QDA by Projection Real Data
$p=9$ , $n=699$ . Repeat $m=100$ times for average test error.
seed <- 2020
m <- 300
per <- c(0.4 , 0.5 , 0.6 , 0.7 , 0.8 )
bc <- read.csv(" real_data/breast-cancer-wisconsin.data" , header = FALSE )
y <- bc [, 11 ]
id0 <- which(y == 2 )
id1 <- which(y == 4 )
y [id0 ] <- 0
y [id1 ] <- 1
x <- data.matrix(bc [, 2 : 10 ])
pct. LDA QDA RDA DSDA DAP QDAP
0.4 4.78 (0.05) 5.29 (0.06) 4.50 (0.05) 5.14 (0.05) 4.61 (0.06) 3.52 (0.03)
0.5 4.80 (0.05) 5.19 (0.06) 4.42 (0.05) 5.02 (0.06) 4.48 (0.06) 3.40 (0.04)
0.6 4.62 (0.06) 5.02 (0.07) 4.23 (0.06) 4.87 (0.06) 4.24 (0.06) 3.30 (0.04)
0.7 4.72 (0.07) 5.13 (0.08) 4.27 (0.07) 4.95 (0.08) 4.23 (0.07) 3.33 (0.06)
0.8 4.66 (0.09) 5.09 (0.10) 4.06 (0.08) 4.70 (0.09) 4.24 (0.09) 3.29 (0.08)
$p=36$ , $n=87$ . Repeat $m=100$ times for average test error.
seed <- 2020
m <- 300
per <- c(0.3 , 0.4 , 0.5 , 0.6 , 0.7 , 0.8 )
fm <- read.table(" real_data/Meter A.data" )
y <- fm [, 37 ]
id0 <- which(y == 2 )
id1 <- which(y == 1 )
y [id0 ] <- 0
y [id1 ] <- 1
x <- data.matrix(fm [, 1 : 36 ])
pct. LDA RDA DSDA DAP QDAP
0.3 25.87 (0.55) 37.20 (0.45) 25.21 (0.69) 19.67 (0.38) 10.64 (0.36)
0.4 12.58 (0.50) 35.37 (0.44) 18.76 (0.65) 17.33 (0.38) 4.10 (0.22)
0.5 3.39 (0.22) 33.50 (0.40) 4.33 (0.33) 15.34 (0.38) 1.40 (0.11)
0.6 1.58 (0.11) 34.05 (0.38) 2.94 (0.26) 15.52 (0.41) 0.89 (0.08)
0.7 0.81 (0.10) 33.64 (0.40) 2.02 (0.20) 13.83 (0.40) 0.64 (0.08)
0.8 0.43 (0.09) 34.08 (0.53) 1.22 (0.16) 12.73 (0.45) 0.69 (0.11)
$p=13$ , $n=303$ . Repeat $m=300$ times for average test error.
seed <- 2020
m <- 300
per <- c(0.4 , 0.5 , 0.6 , 0.7 , 0.8 )
hd <- read.csv(" real_data/heart.csv" )
y <- hd [, 14 ]
id0 <- which(y == 0 )
id1 <- which(y == 1 )
y [id0 ] <- 0
y [id1 ] <- 1
x <- data.matrix(hd [, 1 : 13 ])
pct. LDA QDA RDA DSDA DAP QDAP
0.4 18.82 (0.14) 22.23 (0.15) 19.15 (0.17) 19.28 (0.15) 20.10 (0.20) 18.55 (0.15)
0.5 17.93 (0.15) 21.67 (0.15) 17.91 (0.17) 18.39 (0.16) 18.86 (0.18) 17.75 (0.15)
0.6 17.81 (0.17) 20.86 (0.18) 17.56 (0.18) 18.00 (0.17) 18.43 (0.19) 17.48 (0.17)
0.7 17.22 (0.18) 20.26 (0.21) 16.90 (0.19) 17.33 (0.17) 17.42 (0.18) 16.93 (0.18)
0.8 17.54 (0.24) 19.98 (0.26) 17.02 (0.24) 17.66 (0.23) 17.46 (0.26) 17.14 (0.24)
seed <- 2020
m <- 300
per <- c(0.4 , 0.5 , 0.6 , 0.7 , 0.8 )
sd <- read.table(" ./real_data/segment.dat" )
sd <- sd [which(sd [, 20 ] %in% c(1 , 4 )), ]
y <- sd [, 20 ]
id0 <- which(y == 4 )
id1 <- which(y == 1 )
y [id0 ] <- 0
y [id1 ] <- 1
x <- data.matrix(sd [, c(2 , 6 : 19 )])
pct. LDA RDA DSDA DAP QDAP
0.4 0.73 (0.02) 0.86 (0.03) 0.92 (0.03) 1.70 (0.04) 0.73 (0.02)
0.5 0.74 (0.02) 0.79 (0.03) 0.90 (0.03) 1.68 (0.04) 0.73 (0.02)
0.6 0.72 (0.02) 0.78 (0.03) 0.84 (0.03) 1.64 (0.04) 0.69 (0.02)
0.7 0.73 (0.03) 0.70 (0.04) 0.86 (0.03) 1.57 (0.05) 0.71 (0.03)
0.8 0.76 (0.04) 0.67 (0.04) 0.89 (0.04) 1.59 (0.06) 0.73 (0.03)
seed <- 2020
m <- 300
per <- c(0.4 , 0.5 , 0.6 , 0.7 , 0.8 )
si <- read.table(" ./real_data/sat.trn" )
si <- si [which(si [, 37 ] %in% c(1 , 3 )), ]
y <- si [, 37 ]
id0 <- which(y == 3 )
id1 <- which(y == 1 )
y [id0 ] <- 0
y [id1 ] <- 1
x <- data.matrix(si [, 1 : 36 ])
pct. LDA QDA RDA DSDA DAP QDAP
0.4 1.40 (0.01) 1.94 (0.02) 1.39 (0.01) 1.43 (0.01) 1.61 (0.02) 1.37 (0.01)
0.5 1.37 (0.02) 1.90 (0.02) 1.37 (0.02) 1.41 (0.02) 1.56 (0.02) 1.32 (0.02)
0.6 1.37 (0.02) 1.79 (0.03) 1.38 (0.02) 1.39 (0.02) 1.54 (0.02) 1.32 (0.02)
0.7 1.33 (0.02) 1.78 (0.03) 1.36 (0.02) 1.37 (0.02) 1.59 (0.03) 1.29 (0.02)
0.8 1.31 (0.03) 1.70 (0.03) 1.34 (0.03) 1.36 (0.03) 1.55 (0.04) 1.27 (0.03)
set.seed(seed )
id <- datasplit(id0 = id0 , id1 = id1 ,
m = m , per = per )
len_per <- length(per )
pred_err <- vector(" list" , len_per )
if (parallel == TRUE ) {
Sys.setenv(OMP_NUM_THREADS = 1 )
for (i in 1 : len_per ) {
pred_err [[i ]] <- foreach(j = 1 : m , .combine = rbind ,
.options.RNG = seed ) %dorng %
data_analysis(x = x [id [[i ]][[j ]], ], y = y [id [[i ]][[j ]]],
xnew = x [- id [[i ]][[j ]], ], ynew = y [- id [[i ]][[j ]]],
qdap = qdap , lda = lda , qda = qda , dsda = dsda ,
sqda = sqda , rda = rda )
}
Sys.setenv(OMP_NUM_THREADS = 4 )
} else {
for (i in 1 : len_per ) {
for (j in 1 : m ) {
pred_err [[i ]] <-
rbind(pred_err [[i ]],
data_analysis(x = x [id [[i ]][[j ]], ], y = y [id [[i ]][[j ]]],
xnew = x [- id [[i ]][[j ]], ], ynew = y [- id [[i ]][[j ]]],
qdap = qdap , lda = lda , qda = qda , dsda = dsda ,
sqda = sqda , rda = rda ))
}
}
}
obj_name <- paste0(" pred_err_" , name )
assign(obj_name , pred_err )
if (test == FALSE )
resave(list = obj_name , file = " out/real_data_summary.RData" )
obj_name <- paste0(" pred_err_" , name )
out <- paste0(" out/" , name , " .pdf" )
len_per <- length(per )
data_summarized <- NULL
for (i in 1 : len_per ) {
data_summarized <-
rbind(data_summarized ,
data.frame (summary_se(get(obj_name )[[i ]]), pct. = per [i ]))
}
pdf(out )
pd <- position_dodge(0.01 )
plot <- ggplot(data_summarized , aes(x = pct. , y = prediction.error ,
colour = method )) +
scale_x_continuous(breaks = per ) +
geom_errorbar(aes(ymin = prediction.error - ci.95 ,
ymax = prediction.error + ci.95 ),
width = .02 , position = pd ) +
geom_line(position = pd ) +
geom_point(position = pd )
print(plot )
dev.off()
data_summarized %> %
dplyr :: select(- ci.95 ) %> %
mutate(prediction.error
= format(round(prediction.error * 100 , 2 ), nsmall = 2 )) %> %
mutate(standard.error = paste0(" (" , format(round(standard.error * 100 , 2 ),
nsmall = 2 ), " )" )) %> %
unite(col = prediction.error , prediction.error , standard.error , sep = " " ) %> %
spread(key = method , value = prediction.error )