|
1 | 1 | # test_that("rforce works", { |
2 | | - # Developing help function |
3 | | - .libPaths(rev(.libPaths())) |
4 | | - Sys.setenv(PATH = paste0(Sys.getenv("PATH"), ":/hpc/apps/pandoc/2.11.4/bin")) |
5 | | - library(devtools) |
6 | | - document() |
7 | | - library(dplyr) |
8 | | - # library(sjmisc) |
9 | | - library(Rforce) |
10 | | - |
11 | | - data <- readRDS(file = "/home/yu89975/r-dev/Rforce/data/test_data.rds") %>% filter(Id < 1000) |
12 | | - units_of_cpius <- diff(c(0, quantile(data$X, 1 / 10 * 1:10))) |
13 | | - # table(data$Status) |
14 | | - |
15 | | - data_to_dummy <- data %>% |
16 | | - dplyr::select(-c(X, Status, Id)) |
17 | | - |
18 | | - lst <- sapply(data_to_dummy, function(x) { |
19 | | - if (is.factor(x)) { |
20 | | - sjmisc::to_dummy(x)[, -1] |
21 | | - } else { |
22 | | - x |
23 | | - } |
24 | | - }) |
25 | | - |
26 | | - formula <- as.formula( |
27 | | - paste( |
28 | | - "Surv(Id, X, Status) ~", |
29 | | - paste(colnames(data %>% dplyr::select(-c(X, Status, Id))), collapse = " + ") |
30 | | - ) |
| 2 | +# Developing help function |
| 3 | +.libPaths(rev(.libPaths())) |
| 4 | +Sys.setenv(PATH = paste0(Sys.getenv("PATH"), ":/hpc/apps/pandoc/2.11.4/bin")) |
| 5 | +library(devtools) |
| 6 | +document() |
| 7 | +library(dplyr) |
| 8 | +# library(sjmisc) |
| 9 | +library(Rforce) |
| 10 | + |
| 11 | +data <- readRDS(file = "/home/yu89975/r-dev/Rforce/data/test_data.rds") %>% |
| 12 | + filter(Id < 1000) |
| 13 | +units_of_cpius <- diff(c(0, quantile(data$X, 1 / 10 * 1:10))) |
| 14 | +# table(data$Status) |
| 15 | + |
| 16 | +data_to_dummy <- data %>% |
| 17 | + dplyr::select(-c(X, Status, Id)) |
| 18 | + |
| 19 | +lst <- sapply(data_to_dummy, function(x) { |
| 20 | + if (is.factor(x)) { |
| 21 | + sjmisc::to_dummy(x)[, -1] |
| 22 | + } else { |
| 23 | + x |
| 24 | + } |
| 25 | +}) |
| 26 | + |
| 27 | +formula <- as.formula( |
| 28 | + paste( |
| 29 | + "Surv(Id, X, Status) ~", |
| 30 | + paste(colnames(data %>% dplyr::select(-c(X, Status, Id))), collapse = " + ") |
31 | 31 | ) |
32 | | - |
33 | | - data_to_convert <- cbind.data.frame( |
34 | | - do.call("cbind.data.frame", lst), |
35 | | - data %>% select(c("Id", "X", "Status")) |
36 | | - ) |
37 | | - |
38 | | - # variable_Ids <- colnames(do.call("cbind.data.frame", lst)) |
39 | | - |
40 | | - # variable_Ids <- gsub("\\.x_\\d+", "", variable_Ids[]) |
41 | | - |
42 | | - # unique_vars <- unique(variable_Ids) |
43 | | - |
44 | | - # variable_Ids <- match(variable_Ids, unique_vars) - 1 |
45 | | - |
46 | | - lst_cpiu_wide <- patients_to_cpius( |
47 | | - data_to_convert = data, |
48 | | - units_of_cpiu = units_of_cpius, |
49 | | - weights_by_status = c(0, 1, 1, 1, 1), |
50 | | - pseudo_risk = TRUE, |
51 | | - wide_format = TRUE |
52 | | - ) |
53 | | - |
54 | | - object <- lst_cpiu_wide |
55 | | - temp <- cpius_to_dummy(lst_cpiu_wide) |
56 | | - |
57 | | - design_matrix_Y <- as.matrix(lst_cpiu_wide$designMatrix_Y) |
58 | | - auxiliary_features <- as.matrix(lst_cpiu_wide$auxiliaryFeatures) |
59 | | - |
60 | | - # design_matrix_Y[is.na(design_matrix_Y)] <- 999999.0 |
61 | | - |
62 | | - temp_method1 <- Rforce( |
63 | | - data = data, |
64 | | - formula = formula, |
65 | | - n_intervals = 10, |
66 | | - n_trees = 10, |
67 | | - mtry = 6, |
68 | | - n_splits = 2, |
69 | | - max_depth = 5, |
70 | | - min_node_size = 10, |
71 | | - min_gain = 0, |
72 | | - split_rule = "Rforce-QLR" |
73 | | - ) |
74 | | - |
75 | | - printTree(temp_method1, treeIndex = 1) |
76 | | - |
77 | | - str(temp_method1) |
78 | | - temp <- Rforce( |
79 | | - design_matrix_Y = design_matrix_Y, |
80 | | - auxiliary_features = auxiliary_features, |
81 | | - variable_Ids = variable_Ids, |
82 | | - units_of_cpius = units_of_cpius, |
83 | | - split_rule = "Rforce-QLR", |
84 | | - n_trees = 10, |
85 | | - mtry = 3, |
86 | | - n_splits = 2, |
87 | | - seed = 926 |
88 | | - ) |
89 | | - |
90 | | - lst_cpiu_long <- patients_to_cpius( |
91 | | - data_to_convert = data_to_convert, |
92 | | - units_of_cpiu = units_of_cpius, |
93 | | - weights_by_status = c(0, 1, 1, 1, 1), |
94 | | - pseudo_risk = FALSE, |
95 | | - wide_format = FALSE |
96 | | - ) |
97 | | - |
98 | | - temp_rfslam <- Rforce( |
99 | | - design_matrix_Y = as.matrix(lst_cpiu_long$designMatrix_Y), |
100 | | - auxiliary_features = as.matrix(lst_cpiu_long$auxiliaryFeatures), |
101 | | - variable_Ids = variable_Ids, |
102 | | - units_of_cpius = units_of_cpius, |
103 | | - split_rule = "RF-SLAM", |
104 | | - n_trees = 10, |
105 | | - mtry = 3, |
106 | | - n_splits = 2, |
107 | | - seed = 926 |
108 | | - ) |
109 | | - |
110 | | - predict(temp, design_matrix_Y[1:2, ]) |
111 | | - str(temp) |
112 | | - saveRforce(temp, "./output/") |
113 | | -# |
114 | | - temp2 <- loadRforce("./output/") |
115 | | - |
116 | | - saveRforce(temp2, "./output/") |
117 | | - temp3 <- loadRforce("./output/") |
118 | | - |
119 | | - predict(temp3, design_matrix_Y[1:2, ]) |
| 32 | +) |
| 33 | + |
| 34 | +data_to_convert <- cbind.data.frame( |
| 35 | + do.call("cbind.data.frame", lst), |
| 36 | + data %>% select(c("Id", "X", "Status")) |
| 37 | +) |
| 38 | + |
| 39 | +# variable_Ids <- colnames(do.call("cbind.data.frame", lst)) |
| 40 | + |
| 41 | +# variable_Ids <- gsub("\\.x_\\d+", "", variable_Ids[]) |
| 42 | + |
| 43 | +# unique_vars <- unique(variable_Ids) |
| 44 | + |
| 45 | +# variable_Ids <- match(variable_Ids, unique_vars) - 1 |
| 46 | + |
| 47 | +lst_cpiu_wide <- patients_to_cpius( |
| 48 | + data_to_convert = data, |
| 49 | + units_of_cpiu = units_of_cpius, |
| 50 | + weights_by_status = c(0, 1, 1, 1, 1), |
| 51 | + pseudo_risk = TRUE, |
| 52 | + wide_format = TRUE |
| 53 | +) |
| 54 | + |
| 55 | +object <- lst_cpiu_wide |
| 56 | +temp <- cpius_to_dummy(lst_cpiu_wide) |
| 57 | + |
| 58 | +design_matrix_Y <- as.matrix(lst_cpiu_wide$designMatrix_Y) |
| 59 | +auxiliary_features <- as.matrix(lst_cpiu_wide$auxiliaryFeatures) |
| 60 | + |
| 61 | +# design_matrix_Y[is.na(design_matrix_Y)] <- 999999.0 |
| 62 | + |
| 63 | +temp_method1 <- Rforce( |
| 64 | + data = data, |
| 65 | + formula = formula, |
| 66 | + n_intervals = 10, |
| 67 | + n_trees = 10, |
| 68 | + mtry = 6, |
| 69 | + n_splits = 2, |
| 70 | + max_depth = 5, |
| 71 | + min_node_size = 10, |
| 72 | + min_gain = 0, |
| 73 | + split_rule = "Rforce-QLR" |
| 74 | +) |
| 75 | + |
| 76 | +printTree(temp_method1, treeIndex = 1) |
| 77 | + |
| 78 | +str(temp_method1) |
| 79 | +temp <- Rforce( |
| 80 | + design_matrix_Y = design_matrix_Y, |
| 81 | + auxiliary_features = auxiliary_features, |
| 82 | + variable_Ids = variable_Ids, |
| 83 | + units_of_cpius = units_of_cpius, |
| 84 | + split_rule = "Rforce-QLR", |
| 85 | + n_trees = 10, |
| 86 | + mtry = 3, |
| 87 | + n_splits = 2, |
| 88 | + seed = 926 |
| 89 | +) |
| 90 | + |
| 91 | +lst_cpiu_long <- patients_to_cpius( |
| 92 | + data_to_convert = data_to_convert, |
| 93 | + units_of_cpiu = units_of_cpius, |
| 94 | + weights_by_status = c(0, 1, 1, 1, 1), |
| 95 | + pseudo_risk = FALSE, |
| 96 | + wide_format = FALSE |
| 97 | +) |
| 98 | + |
| 99 | +temp_rfslam <- Rforce( |
| 100 | + design_matrix_Y = as.matrix(lst_cpiu_long$designMatrix_Y), |
| 101 | + auxiliary_features = as.matrix(lst_cpiu_long$auxiliaryFeatures), |
| 102 | + variable_Ids = variable_Ids, |
| 103 | + units_of_cpius = units_of_cpius, |
| 104 | + split_rule = "RF-SLAM", |
| 105 | + n_trees = 10, |
| 106 | + mtry = 3, |
| 107 | + n_splits = 2, |
| 108 | + seed = 926 |
| 109 | +) |
| 110 | + |
| 111 | +predict(temp, design_matrix_Y[1:2, ]) |
| 112 | +str(temp) |
| 113 | +saveRforce(temp, "./output/") |
| 114 | +# |
| 115 | +temp2 <- loadRforce("./output/") |
| 116 | + |
| 117 | +saveRforce(temp2, "./output/") |
| 118 | +temp3 <- loadRforce("./output/") |
| 119 | + |
| 120 | +predict(temp3, design_matrix_Y[1:2, ]) |
0 commit comments