forked from emrahkirdok/ybva
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathle-nbayes.R
More file actions
48 lines (38 loc) · 1.2 KB
/
le-nbayes.R
File metadata and controls
48 lines (38 loc) · 1.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
library(seqinr)
library(kmer)
dna <- read.fasta(file = "03-bilgisayarli-dusunme/data/example.fna")
train <- function(dna,k){
Y <- names(dna)
X <- colnames(kcount(x = dna[[1]], k = k))
mat <- matrix(data = 0, nrow = length(Y), ncol = length(X))
colnames(mat) <- X
rownames(mat) <- Y
for (i in 1:length(Y)){
kounts <- kcount(x = dna[[i]], k = k)+1
mat[i,colnames(kounts)] <- kounts/sum(kounts)
}
return(mat)
}
mat <- train(dna, k=8)
query <- dna[[1]][4:45]
predict <- function(query,mat,k){
query_freq <- kcount(x = query,k = k)/sum(kcount(x = query,k = k))
kmer_names <- colnames(query_freq>0)
Y <- rownames(mat)
predict_mat <- matrix(data = 0, nrow=length(Y), ncol = 2)
predict_mat <- data.frame(predict_mat)
for (i in 1:length(Y)){
y <- Y[i]
freq <- mat[y,kmer_names]
g <- 0; for(j in freq){g <- 0 + -log(j)}
predict_mat[i,] <- c(y, g)
}
colnames(predict_mat) <- c("Class", "-Log Likelihood")
predict_mat[,2] <- as.numeric(predict_mat[,2])
#predict_mat[which.max(predict_mat$X2),]
return(predict_mat)
hist(predict_mat[,2])
}
k<-8
mat <- train(dna,k=k)
h <- predict(query = dna[[4]], mat = mat, k = k); h[which.max(h$`-Log Likelihood`),];h