-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathautoH2Oallmodel.R
111 lines (86 loc) · 3.11 KB
/
autoH2Oallmodel.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#autoH2O for all auto models
#worrying: conversion to h2o data frame drops row names
#put winning models as hyperparams of model
#fail printout
#
setwd(cpout.folder)
library(h2o)
fail.try=F
for(itr in c(.1,1)){#,30
try({
when<-proc.time()
# For binary classification, response should be a factor
#train[,y] <- as.factor(train[,y])
#test[,y] <- as.factor(test[,y])
maxrun<-itr*tuneLength
allmodel<-paste("h2oAutoml",as.character(maxrun),sep = " ")
write.table(allmodel,file = "last algorithm tried.csv", quote = F, row.names = F,col.names = F)
write.table(gens.names[gend.data],file = "last task tried.csv", quote = F, row.names = F,col.names = F)
if(!CrashNRep(allmodel)) {
fail.try=T
h2o.init()
# Import a sample binary outcome train/test set into H2O
train <- as.h2o(training)
test <- as.h2o(testing)
# Identify predictors and response
y <- "V1"
x <- setdiff(names(train), y)
aml <- h2o.automl( x=x, y = y,
max_runtime_secs = maxrun*60,
nfolds=cv.iters, seed=seed.var,
training_frame = train)
# max_runtime_secs = 0,
# max_models = 20,max_models = 200,
# )
# View the AutoML Leaderboard, keep_cross_validation_models=FALSE
lb <- aml@leaderboard
lbdf<-as.data.frame(lb)
print(lbdf)
aml@leader
lbdf[1,3]
lbdf[1,1]
# If you need to generate predictions on a test set, you can make
# predictions directly on the `"H2OAutoML"` object, or on the leader
# model object directly
pred <- h2o.predict(aml, test) # predict(aml, test) also works
pred <- h2o.predict(aml@leader, test)
preddf<-as.data.frame(pred) #452434
row.names(preddf) <- row.names(testing)
print(preddf)
RANKSforNDCG<-NULL
if(predictNDCG){
NDCGtest<- as.h2o(df.forNDCG)
RANKSforNDCG <- h2o.predict(aml, NDCGtest) # predict(aml, NDCGtest) also works
RANKSforNDCG <- h2o.predict(aml@leader, NDCGtest)
#row.names(RANKSforNDCG) <- row.names(df.forNDCG)
RANKSforNDCGdf <- data.frame(RANKSforNDCG,1)
RANKSforNDCG<-RANKSforNDCGdf[,1]
}
overRMSE<-lbdf[1,3]
printPredMets(predicted.outcomes=preddf,overRMSE=overRMSE,hypercount="full",libpack="autoH2O",RANKSforNDCG=RANKSforNDCG)
varimportant<-as.data.frame(h2o.varimp(aml@leader))
print(varimportant)
colNms<-as.vector(varimportant$names)
colImpor<-signif(varimportant$coefficients,digits = 3)
fail.try=F
if(fail.try){
failfail()
} else {
try({varimprint(metpack="h2oa",colNms=colNms,colImpor=colImpor)})
try({
custom_predict <- function(object, newdata) {
test <- as.h2o(newdata)
pred <- h2o.predict(object, test)
preddf<-as.data.frame(pred)
return(preddf)
}
varimperm(custom_predict=custom_predict, modeltp=aml,
X=testing[,-1], Y=testing[,1], metpack = "h2oa_hold",n_sample = 1000)
varimperm(custom_predict=custom_predict, modeltp=aml,
X=training[,-1], Y=training[,1], metpack = "h2oa_train",n_sample = 1000)
})
}
}
})
}
try({h2o.shutdown(prompt = F)})