-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathBioMartGOFilter.Nfurzeri.R
More file actions
148 lines (134 loc) · 6.93 KB
/
Copy pathBioMartGOFilter.Nfurzeri.R
File metadata and controls
148 lines (134 loc) · 6.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
BioMartGOFilter.Nfurzeri <- function(GO.CSV,
IncludeChildren = TRUE,
AllowedOrthologyConfidenceCodes = c("0", "1"),
SafeOrthologyTypes = c("ortholog_one2one", "ortholog_one2many"),
CombineFruitFlyHomology = FALSE,
CombineHumanHomology = TRUE,
CombineMedakaHomology = FALSE,
CombineMouseHomology = TRUE,
CombineNematodeHomology = FALSE,
CombineXenopusHomology = FALSE,
CombineZebrafishHomology = TRUE) {
suppressPackageStartupMessages(library("biomaRt"))
suppressPackageStartupMessages(library("retry"))
suppressPackageStartupMessages(library("ontologyIndex"))
biomartCacheClear()
if (IncludeChildren) {
Ontology <- get.Ontology()
GO.Vector <- unlist(strsplit(GO.CSV, split = ","))
GO.Vector.Complemented <- c()
for (i in 1 : length(GO.Vector)) {
GO.Vector.Complemented <- c(GO.Vector.Complemented,
get_descendants(Ontology, GO.Vector[i], exclude_roots = FALSE))
}
GO.CSV <- paste0(unique(GO.Vector.Complemented), collapse = ",")
}
SpeciesDatasetNameList <- c()
if (CombineFruitFlyHomology) {
SpeciesDatasetNameList <- c(SpeciesDatasetNameList, "dmelanogaster_gene_ensembl")
}
if (CombineHumanHomology) {
SpeciesDatasetNameList <- c(SpeciesDatasetNameList, "hsapiens_gene_ensembl")
}
if (CombineMedakaHomology) {
SpeciesDatasetNameList <- c(SpeciesDatasetNameList, "olatipes_gene_ensembl")
}
if (CombineMouseHomology) {
SpeciesDatasetNameList <- c(SpeciesDatasetNameList, "mmusculus_gene_ensembl")
}
if (CombineNematodeHomology) {
SpeciesDatasetNameList <- c(SpeciesDatasetNameList, "celegans_gene_ensembl")
}
if (CombineXenopusHomology) {
SpeciesDatasetNameList <- c(SpeciesDatasetNameList, "xtropicalis_gene_ensembl")
}
if (CombineZebrafishHomology) {
SpeciesDatasetNameList <- c(SpeciesDatasetNameList, "drerio_gene_ensembl")
}
retry({
ConsoleOutput <- capture.output({
KillifishTable <-
getBM(attributes = c("ensembl_gene_id", "external_gene_name", "go_id"),
filters = "go_parent_term", values = GO.CSV,
mart = useEnsembl(biomart = "ensembl", dataset = "nfurzeri_gene_ensembl"))
});
if (length(ConsoleOutput) != 0) {
stop("Error")
}
}, when = ".*", silent = TRUE)
row.names(KillifishTable) <- NULL
KillifishGOList <- CompileGOList(KillifishTable)
for (SpeciesDatasetName in SpeciesDatasetNameList) {
retry({
ConsoleOutput <- capture.output({
ThisSpeciesTable <-
getBM(attributes = c("ensembl_gene_id", "external_gene_name", "go_id"),
filters = c("with_nfurzeri_homolog", "go_parent_term"), values = list(TRUE, GO.CSV),
mart = useEnsembl(biomart = "ensembl", dataset = SpeciesDatasetName))
});
if (length(ConsoleOutput) != 0) {
stop("Error")
}
}, when = ".*", silent = TRUE)
row.names(ThisSpeciesTable) <- NULL
ThisSpeciesGOList <- CompileGOList(ThisSpeciesTable)
retry({
ConsoleOutput <- capture.output({
ThisSpeciesHomologyTable <-
getBM(attributes = c("ensembl_gene_id", "external_gene_name",
"nfurzeri_homolog_ensembl_gene", "nfurzeri_homolog_associated_gene_name",
"nfurzeri_homolog_orthology_type", "nfurzeri_homolog_orthology_confidence"),
filters = c("with_nfurzeri_homolog", "go_parent_term"), values = list(TRUE, GO.CSV),
mart = useEnsembl(biomart = "ensembl", dataset = SpeciesDatasetName))
});
if (length(ConsoleOutput) != 0) {
stop("Error")
}
}, when = ".*", silent = TRUE)
row.names(ThisSpeciesHomologyTable) <- NULL
KillifishGOList <- TranslateGOList.Nfurzeri(ThisSpeciesHomologyTable, ThisSpeciesGOList, KillifishGOList, AllowedOrthologyConfidenceCodes = AllowedOrthologyConfidenceCodes, SafeOrthologyTypes = SafeOrthologyTypes)
}
return(KillifishGOList)
}
CompileGOList <- function(BioMartExportGOTable,
ExistingGOList = list(),
EnsemblIDColumnName = "ensembl_gene_id",
GOColumnName = "go_id") {
if (nrow(BioMartExportGOTable) == 0) {
return(ExistingGOList)
}
for (i in 1 : nrow(BioMartExportGOTable)) {
if (BioMartExportGOTable[i, EnsemblIDColumnName] %in% names(ExistingGOList)) {
ExistingGOList[[BioMartExportGOTable[i, EnsemblIDColumnName]]] <- unique(c(ExistingGOList[[BioMartExportGOTable[i, EnsemblIDColumnName]]], BioMartExportGOTable[i, GOColumnName]))
}
else {
ExistingGOList[[BioMartExportGOTable[i, EnsemblIDColumnName]]] <- BioMartExportGOTable[i, GOColumnName]
}
}
return(ExistingGOList)
}
TranslateGOList.Nfurzeri <- function(HomologyTable,
OriginalGOList, NfurzeriGOList = list(),
OriginalEnsemblIDColumnName = "ensembl_gene_id",
OrthologyTypeColumnName = "nfurzeri_homolog_orthology_type",
OrthologyConfidenceColumnName = "nfurzeri_homolog_orthology_confidence",
AllowedOrthologyConfidenceCodes,
SafeOrthologyTypes,
NfurzeriEnsemblIDColumnName = "nfurzeri_homolog_ensembl_gene") {
for (i in 1 : length(OriginalGOList)) {
OriginalEnsemblID <- names(OriginalGOList)[i]
HomologyNfurzeriEnsemblIDs <- HomologyTable[(HomologyTable[, OriginalEnsemblIDColumnName] == OriginalEnsemblID) &
(HomologyTable[, OrthologyTypeColumnName] %in% SafeOrthologyTypes) &
(HomologyTable[, OrthologyConfidenceColumnName] %in% AllowedOrthologyConfidenceCodes),
NfurzeriEnsemblIDColumnName]
for (j in HomologyNfurzeriEnsemblIDs) {
if (j %in% names(NfurzeriGOList)) {
NfurzeriGOList[[j]] <- unique(c(NfurzeriGOList[[j]], OriginalGOList[[OriginalEnsemblID]]))
}
else {
NfurzeriGOList[[j]] <- OriginalGOList[[OriginalEnsemblID]]
}
}
}
return(NfurzeriGOList)
}