3
3
#
4
4
# -------------------------- knn -------------------------- #
5
5
# '
6
+ # ' Build a KNN model
7
+ # '
8
+ # ' Builds a K-nearest neighbour model on an H2OFrame.
9
+ # '
6
10
# ' @param x (Optional) A vector containing the names or indices of the predictor variables to use in building the model.
7
11
# ' If x is missing, then all columns except y are used.
8
12
# ' @param y The name or column index of the response variable in the data.
9
13
# ' The response must be either a numeric or a categorical/factor variable.
10
14
# ' If the response is numeric, then a regression model will be trained, otherwise it will train a classification model.
11
15
# ' @param training_frame Id of the training data frame.
12
- # ' @param model_id Destination id for this model; auto-generated if not specified.
13
16
# ' @param id_column Identify each record column.
17
+ # ' @param model_id Destination id for this model; auto-generated if not specified.
14
18
# ' @param ignore_const_cols \code{Logical}. Ignore constant columns. Defaults to TRUE.
15
19
# ' @param seed Seed for random numbers (affects certain parts of the algo that are stochastic and those might or might not be enabled by default).
16
20
# ' Defaults to -1 (time-based random number).
25
29
# ' "WEIGHTED_OVO". Defaults to AUTO.
26
30
# ' @param k Number of nearest neighbours Defaults to 3.
27
31
# ' @param distance Distance type Must be one of: "AUTO", "euclidean", "manhattan", "cosine".
32
+ # ' @param verbose \code{Logical}. Print scoring history to the console. Defaults to FALSE.
33
+ # ' @return Creates a \linkS4class{H2OModel} object of the right type.
34
+ # ' @seealso \code{\link{predict.H2OModel}} for prediction
28
35
# ' @export
29
36
h2o.knn <- function (x ,
30
37
y ,
31
38
training_frame ,
39
+ id_column ,
40
+ response_column ,
32
41
model_id = NULL ,
33
- id_column = NULL ,
34
42
ignore_const_cols = TRUE ,
35
43
seed = - 1 ,
36
44
max_runtime_secs = 0 ,
@@ -40,7 +48,8 @@ h2o.knn <- function(x,
40
48
gainslift_bins = - 1 ,
41
49
auc_type = c(" AUTO" , " NONE" , " MACRO_OVR" , " WEIGHTED_OVR" , " MACRO_OVO" , " WEIGHTED_OVO" ),
42
50
k = 3 ,
43
- distance = c(" AUTO" , " euclidean" , " manhattan" , " cosine" ))
51
+ distance = c(" AUTO" , " euclidean" , " manhattan" , " cosine" ),
52
+ verbose = FALSE )
44
53
{
45
54
# Validate required training_frame first and other frame args: should be a valid key or an H2OFrame object
46
55
training_frame <- .validate.H2OFrame(training_frame , required = TRUE )
@@ -59,9 +68,11 @@ h2o.knn <- function(x,
59
68
parms <- list ()
60
69
parms $ training_frame <- training_frame
61
70
args <- .verify_dataxy(training_frame , x , y )
62
- if ( ! missing(offset_column ) && ! is.null(offset_column )) args $ x_ignore <- args $ x_ignore [! ( offset_column == args $ x_ignore )]
63
- if ( ! missing(weights_column ) && ! is.null(weights_column )) args $ x_ignore <- args $ x_ignore [! ( weights_column == args $ x_ignore )]
64
- if ( ! missing(fold_column ) && ! is.null(fold_column )) args $ x_ignore <- args $ x_ignore [! ( fold_column == args $ x_ignore )]
71
+ if (! missing(id_column )) {
72
+ parms $ id_column <- id_column
73
+ } else {
74
+ stop(" ID column is required." )
75
+ }
65
76
parms $ ignored_columns <- args $ x_ignore
66
77
parms $ response_column <- args $ y
67
78
@@ -91,13 +102,14 @@ h2o.knn <- function(x,
91
102
parms $ distance <- distance
92
103
93
104
# Error check and build model
94
- model <- .h2o.modelJob(' knn' , parms , h2oRestApiVersion = 3 , verbose = FALSE )
105
+ model <- .h2o.modelJob(' knn' , parms , h2oRestApiVersion = 3 , verbose = verbose )
95
106
return (model )
96
107
}
97
108
.h2o.train_segments_knn <- function (x ,
98
109
y ,
99
110
training_frame ,
100
- id_column = NULL ,
111
+ id_column ,
112
+ response_column ,
101
113
ignore_const_cols = TRUE ,
102
114
seed = - 1 ,
103
115
max_runtime_secs = 0 ,
@@ -133,9 +145,11 @@ h2o.knn <- function(x,
133
145
parms <- list ()
134
146
parms $ training_frame <- training_frame
135
147
args <- .verify_dataxy(training_frame , x , y )
136
- if ( ! missing(offset_column ) && ! is.null(offset_column )) args $ x_ignore <- args $ x_ignore [! ( offset_column == args $ x_ignore )]
137
- if ( ! missing(weights_column ) && ! is.null(weights_column )) args $ x_ignore <- args $ x_ignore [! ( weights_column == args $ x_ignore )]
138
- if ( ! missing(fold_column ) && ! is.null(fold_column )) args $ x_ignore <- args $ x_ignore [! ( fold_column == args $ x_ignore )]
148
+ if (! missing(id_column )) {
149
+ parms $ id_column <- id_column
150
+ } else {
151
+ stop(" ID column is required." )
152
+ }
139
153
parms $ ignored_columns <- args $ x_ignore
140
154
parms $ response_column <- args $ y
141
155
0 commit comments