forked from CSB-IG/MoNet-signatures
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathxgb_monet.R
More file actions
68 lines (58 loc) · 1.46 KB
/
Copy pathxgb_monet.R
File metadata and controls
68 lines (58 loc) · 1.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
library(caret) # Machine Learning Library
library(dplyr) # Data Wrangling
library(ggplo2) # Data visualization
library(xgboost) # XGBoost algorithm and utils
library(Ckmeans.1d.dp) # Feature Importance
set.seed(42)
# M.t and df.umap.mclust are objects from umap_gmm.R file
train <- cbind(M.t, df.umap.mclust$classification) %>%
as.data.frame()
colnames(train)[393737] <- "Classes"
## Train matrix
train_Dmatrix <- train %>%
select(-c(393737)) %>%
as.matrix() %>%
xgb.DMatrix()
targets <- train$Classes
# Cross-validation
xgb_tr_control <- trainControl(
method = "cv",
number = 5,
allowParallel = T,
verboseIter = F,
returnData = F
)
# Building parameters set
xgb_grid <- expand.grid(
list(
nrounds = seq(100, 200),
max_depth = c(6, 15, 20),
colsample_bytree = 1,
eta = 0.02,
gamma = 0,
min_child_weight = 1,
subsample = 1)
)
### XGBoost training ###
xgb_model <- caret::train(
train_Dmatrix,
targets,
trControl = xgb_tr_control,
tuneGrid = xgb_grid,
method = "xgbTree",
nthread = 4
)
#xgb_model$bestTune
### Feature importance calculated by XGBoost ###
xgb_imp <- xgb.importance(
feature_names = colnames(train %>% select(-c(393737))),
model = xgb_model$finalModel
)
## Write the supplementary file
xgb_imp %>% write.csv("feature_importance.csv", row.names = F)
#### Visualize the feature importance ###
xgb.ggplot.importance(
xgb_imp,
rel_to_first = F,
top_n = 6
)