-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSummarizeTaxa_function.R
More file actions
113 lines (108 loc) · 4.04 KB
/
SummarizeTaxa_function.R
File metadata and controls
113 lines (108 loc) · 4.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#### Function to summarize by taxa
### From https://github.com/joey711/phyloseq/issues/418
library("phyloseq")
library("data.table")
library("ggplot2")
fast_melt = function(physeq){
# supports "naked" otu_table as `physeq` input.
otutab = as(otu_table(physeq), "matrix")
if(!taxa_are_rows(physeq)){otutab <- t(otutab)}
otudt = data.table(otutab, keep.rownames = TRUE)
setnames(otudt, "rn", "taxaID")
# Enforce character taxaID key
otudt[, taxaIDchar := as.character(taxaID)]
otudt[, taxaID := NULL]
setnames(otudt, "taxaIDchar", "taxaID")
# Melt count table
mdt = melt.data.table(otudt,
id.vars = "taxaID",
variable.name = "SampleID",
value.name = "count")
# Remove zeroes, NAs
mdt <- mdt[count > 0][!is.na(count)]
# Calculate relative abundance
mdt[, RelativeAbundance := count / sum(count), by = SampleID]
if(!is.null(tax_table(physeq, errorIfNULL = FALSE))){
# If there is a tax_table, join with it. Otherwise, skip this join.
taxdt = data.table(as(tax_table(physeq, errorIfNULL = TRUE), "matrix"), keep.rownames = TRUE)
setnames(taxdt, "rn", "taxaID")
# Enforce character taxaID key
taxdt[, taxaIDchar := as.character(taxaID)]
taxdt[, taxaID := NULL]
setnames(taxdt, "taxaIDchar", "taxaID")
# Join with tax table
setkey(taxdt, "taxaID")
setkey(mdt, "taxaID")
mdt <- taxdt[mdt]
}
return(mdt)
}
summarize_taxa = function(physeq, Rank, GroupBy = NULL){
Rank <- Rank[1]
if(!Rank %in% rank_names(physeq)){
message("The argument to `Rank` was:\n", Rank,
"\nBut it was not found among taxonomic ranks:\n",
paste0(rank_names(physeq), collapse = ", "), "\n",
"Please check the list shown above and try again.")
}
if(!is.null(GroupBy)){
GroupBy <- GroupBy[1]
if(!GroupBy %in% sample_variables(physeq)){
message("The argument to `GroupBy` was:\n", GroupBy,
"\nBut it was not found among sample variables:\n",
paste0(sample_variables(physeq), collapse = ", "), "\n",
"Please check the list shown above and try again.")
}
}
# Start with fast melt
mdt = fast_melt(physeq)
if(!is.null(GroupBy)){
# Add the variable indicated in `GroupBy`, if provided.
sdt = data.table(SampleID = sample_names(physeq),
var1 = get_variable(physeq, GroupBy))
setnames(sdt, "var1", GroupBy)
# Join
setkey(sdt, SampleID)
setkey(mdt, SampleID)
mdt <- sdt[mdt]
}
# Summarize
# Summarize
Nsamples = nsamples(physeq)
summarydt = mdt[, list(meanRA = sum(RelativeAbundance)/Nsamples,
sdRA = sd(RelativeAbundance),
minRA = min(RelativeAbundance),
maxRA = max(RelativeAbundance)),
by = c(Rank, GroupBy)]
return(summarydt)
}
plot_taxa_summary = function(physeq, Rank, GroupBy = NULL){
# Get taxa summary table
dt1 = summarize_taxa(physeq, Rank = Rank, GroupBy = GroupBy)
# Set factor appropriately for plotting
RankCol = which(colnames(dt1) == Rank)
setorder(dt1, -meanRA)
dt1[, RankFac := factor(dt1[[Rank]],
levels = rev(dt1[[Rank]]))]
dt1[, ebarMax := max(c(0, min(meanRA + sdRA))), by = eval(Rank)]
dt1[, ebarMin := max(c(0, min(meanRA - sdRA))), by = eval(Rank)]
# Set zeroes to one-tenth the smallest value
ebarMinFloor = dt1[(ebarMin > 0), min(ebarMin)]
ebarMinFloor <- ebarMinFloor / 10
dt1[(ebarMin == 0), ebarMin := ebarMinFloor]
pRank = ggplot(dt1, aes(x = meanRA, y = RankFac)) +
scale_x_log10() +
xlab("Mean Relative Abundance") +
ylab(Rank) +
theme_bw()
if(!is.null(GroupBy)){
# pRank <- pRank + facet_wrap(facets = as.formula(paste("~", GroupBy)))
pRank <- pRank + geom_point(mapping = aes_string(colour = GroupBy),
size = 5)
} else {
# Don't include error bars for faceted version
pRank <- pRank + geom_errorbarh(aes(xmax = ebarMax,
xmin = ebarMin))
}
return(pRank)
}