Skip to content

Commit ef16c84

Browse files
committed
Fix failed tests
1 parent dd5104e commit ef16c84

File tree

9 files changed

+75
-25
lines changed

9 files changed

+75
-25
lines changed

h2o-algos/src/main/java/hex/knn/KNN.java

+6-7
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@ class KNNDriver extends Driver {
4949
public void computeImpl() {
5050
KNNModel model = null;
5151
Frame result = new Frame(Key.make("KNN_distances"));
52-
Frame tmpResult = null;
5352
try {
5453
init(true); // Initialize parameters
5554
if (error_count() > 0) {
@@ -72,12 +71,15 @@ public void computeImpl() {
7271
query[j] = train.vec(j).chunkForChunkIdx(i).deepCopy();
7372
}
7473
KNNDistanceTask task = new KNNDistanceTask(_parms._k, query, KNNDistanceFactory.createDistance(_parms._distance), idColumnIndex, idColumn, idType, responseColumnIndex, responseColumn);
75-
tmpResult = task.doAll(train).outputFrame();
74+
Frame tmpResult = task.doAll(train).outputFrame();
75+
Scope.untrack(tmpResult);
76+
7677
// merge result from a chunk
7778
result = result.add(tmpResult);
7879
}
79-
DKV.put(result._key, result);
80-
model._output.setDistancesKey(result._key);
80+
Key<Frame> key = result._key;
81+
DKV.put(key, result);
82+
model._output.setDistancesKey(key);
8183
Scope.untrack(result);
8284

8385
model.update(_job);
@@ -90,9 +92,6 @@ public void computeImpl() {
9092
if (model != null) {
9193
model.unlock(_job);
9294
}
93-
if (tmpResult != null) {
94-
tmpResult.remove();
95-
}
9695
}
9796
}
9897
}

h2o-algos/src/test/java/hex/knn/KNNTest.java

+1-3
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,7 @@ public void testIris() {
5555
ModelMetricsMultinomial mm1 = (ModelMetricsMultinomial) knn._output._training_metrics;
5656
Assert.assertEquals(mm.auc(), mm1.auc(), 0);
5757

58-
// test after KNN API will be ready
59-
//knn.testJavaScoring(fr, preds, 0);
60-
58+
knn.testJavaScoring(fr, preds, 0);
6159
} finally {
6260
if (knn != null){
6361
knn.delete();

h2o-bindings/bin/custom/R/gen_knn.py

+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
extensions = dict(
2+
extra_params=[('verbose', 'FALSE')],
3+
required_params=['x', 'y', 'training_frame', 'id_column', 'response_column'],
4+
skip_default_set_params_for=['training_frame', 'ignored_columns', 'response_column', 'offset_column'],
5+
set_required_params="""
6+
parms$training_frame <- training_frame
7+
args <- .verify_dataxy(training_frame, x, y)
8+
if (!missing(id_column)) {
9+
parms$id_column <- id_column
10+
} else {
11+
stop("ID column is required.")
12+
}
13+
parms$ignored_columns <- args$x_ignore
14+
parms$response_column <- args$y
15+
"""
16+
)
17+
18+
19+
doc = dict(
20+
preamble="""
21+
Build a KNN model
22+
23+
Builds a K-nearest neighbour model on an H2OFrame.
24+
""",
25+
params=dict(
26+
verbose="""
27+
\code{Logical}. Print scoring history to the console. Defaults to FALSE.
28+
"""
29+
),
30+
returns="""
31+
Creates a \linkS4class{H2OModel} object of the right type.
32+
""",
33+
seealso="""
34+
\code{\link{predict.H2OModel}} for prediction
35+
""",
36+
examples=""""""
37+
)

h2o-py/tests/testdir_sklearn/pyunit_sklearn_classification_all_estimators.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ def make_tests(classifier):
149149

150150

151151
failing = [
152-
'H2OStackedEnsembleClassifier', 'H2OUpliftRandomForestClassifier' # needs a separate test (requires models as parameters)
152+
'H2OStackedEnsembleClassifier', 'H2OUpliftRandomForestClassifier', 'H2OKnnClassifier' # needs a separate test (requires models as parameters)
153153
]
154154
classifiers = [cls for name, cls in inspect.getmembers(h2o.sklearn, inspect.isclass)
155155
if name.endswith('Classifier') and name not in ['H2OAutoMLClassifier']+failing]

h2o-py/tests/testdir_sklearn/pyunit_sklearn_generic_all_estimators.py

+1
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,7 @@ def make_tests(classifier):
200200
'H2OUpliftRandomForestEstimator', # generic part is not implemented yet
201201
'H2ODecisionTreeEstimator', # generic part is not implemented yet
202202
'H2OAdaBoostEstimator', # generic part is not implemented yet or test needs to be adjusted just for classification
203+
'H2OKnnEstimator' # generic part is not implemented yet
203204
]
204205
estimators = [cls for name, cls in inspect.getmembers(h2o.sklearn, inspect.isclass)
205206
if name.endswith('Estimator') and name not in ['H2OAutoMLEstimator'] + failing]

h2o-py/tests/testdir_sklearn/pyunit_sklearn_regression_all_estimators.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,8 @@ def make_tests(classifier):
139139
'H2OStackedEnsembleRegressor', # needs a separate test (requires models as parameters),
140140
'H2OUpliftRandomForestRegressor', # does not support regression yet
141141
'H2ODecisionTreeRegressor', # does not support regression yet
142-
'H2OAdaBoostRegressor' # does not support regression yet
142+
'H2OAdaBoostRegressor', # does not support regression yet
143+
'H2OKnnRegressor' # does not support regression
143144
]
144145
regressors = [cls for name, cls in inspect.getmembers(h2o.sklearn, inspect.isclass)
145146
if name.endswith('Regressor') and name not in ['H2OAutoMLRegressor']+failing]

h2o-py/tests_rest_smoke/testdir_multi_jvm/test_rest_api.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
algos = ['coxph', 'kmeans', 'deeplearning', 'drf', 'glm', 'gbm', 'pca', 'naivebayes', 'glrm', 'svd', 'isotonicregression',
2424
'psvm', 'aggregator', 'word2vec', 'stackedensemble', 'xgboost', 'isolationforest', 'gam',
2525
'generic', 'targetencoder', 'rulefit', 'extendedisolationforest', 'anovaglm', 'modelselection',
26-
'upliftdrf', 'infogram', 'dt', 'adaboost', 'hglm']
26+
'upliftdrf', 'infogram', 'dt', 'adaboost', 'hglm', 'knn']
2727

2828
algo_additional_default_params = { 'grep' : { 'regex' : '.*' },
2929
'kmeans' : { 'k' : 2 },

h2o-r/h2o-package/R/knn.R

+25-11
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,18 @@
33
#
44
# -------------------------- knn -------------------------- #
55
#'
6+
#' Build a KNN model
7+
#'
8+
#' Builds a K-nearest neighbour model on an H2OFrame.
9+
#'
610
#' @param x (Optional) A vector containing the names or indices of the predictor variables to use in building the model.
711
#' If x is missing, then all columns except y are used.
812
#' @param y The name or column index of the response variable in the data.
913
#' The response must be either a numeric or a categorical/factor variable.
1014
#' If the response is numeric, then a regression model will be trained, otherwise it will train a classification model.
1115
#' @param training_frame Id of the training data frame.
12-
#' @param model_id Destination id for this model; auto-generated if not specified.
1316
#' @param id_column Identify each record column.
17+
#' @param model_id Destination id for this model; auto-generated if not specified.
1418
#' @param ignore_const_cols \code{Logical}. Ignore constant columns. Defaults to TRUE.
1519
#' @param seed Seed for random numbers (affects certain parts of the algo that are stochastic and those might or might not be enabled by default).
1620
#' Defaults to -1 (time-based random number).
@@ -25,12 +29,16 @@
2529
#' "WEIGHTED_OVO". Defaults to AUTO.
2630
#' @param k Number of nearest neighbours Defaults to 3.
2731
#' @param distance Distance type Must be one of: "AUTO", "euclidean", "manhattan", "cosine".
32+
#' @param verbose \code{Logical}. Print scoring history to the console. Defaults to FALSE.
33+
#' @return Creates a \linkS4class{H2OModel} object of the right type.
34+
#' @seealso \code{\link{predict.H2OModel}} for prediction
2835
#' @export
2936
h2o.knn <- function(x,
3037
y,
3138
training_frame,
39+
id_column,
40+
response_column,
3241
model_id = NULL,
33-
id_column = NULL,
3442
ignore_const_cols = TRUE,
3543
seed = -1,
3644
max_runtime_secs = 0,
@@ -40,7 +48,8 @@ h2o.knn <- function(x,
4048
gainslift_bins = -1,
4149
auc_type = c("AUTO", "NONE", "MACRO_OVR", "WEIGHTED_OVR", "MACRO_OVO", "WEIGHTED_OVO"),
4250
k = 3,
43-
distance = c("AUTO", "euclidean", "manhattan", "cosine"))
51+
distance = c("AUTO", "euclidean", "manhattan", "cosine"),
52+
verbose = FALSE)
4453
{
4554
# Validate required training_frame first and other frame args: should be a valid key or an H2OFrame object
4655
training_frame <- .validate.H2OFrame(training_frame, required=TRUE)
@@ -59,9 +68,11 @@ h2o.knn <- function(x,
5968
parms <- list()
6069
parms$training_frame <- training_frame
6170
args <- .verify_dataxy(training_frame, x, y)
62-
if( !missing(offset_column) && !is.null(offset_column)) args$x_ignore <- args$x_ignore[!( offset_column == args$x_ignore )]
63-
if( !missing(weights_column) && !is.null(weights_column)) args$x_ignore <- args$x_ignore[!( weights_column == args$x_ignore )]
64-
if( !missing(fold_column) && !is.null(fold_column)) args$x_ignore <- args$x_ignore[!( fold_column == args$x_ignore )]
71+
if (!missing(id_column)) {
72+
parms$id_column <- id_column
73+
} else {
74+
stop("ID column is required.")
75+
}
6576
parms$ignored_columns <- args$x_ignore
6677
parms$response_column <- args$y
6778

@@ -91,13 +102,14 @@ h2o.knn <- function(x,
91102
parms$distance <- distance
92103

93104
# Error check and build model
94-
model <- .h2o.modelJob('knn', parms, h2oRestApiVersion=3, verbose=FALSE)
105+
model <- .h2o.modelJob('knn', parms, h2oRestApiVersion=3, verbose=verbose)
95106
return(model)
96107
}
97108
.h2o.train_segments_knn <- function(x,
98109
y,
99110
training_frame,
100-
id_column = NULL,
111+
id_column,
112+
response_column,
101113
ignore_const_cols = TRUE,
102114
seed = -1,
103115
max_runtime_secs = 0,
@@ -133,9 +145,11 @@ h2o.knn <- function(x,
133145
parms <- list()
134146
parms$training_frame <- training_frame
135147
args <- .verify_dataxy(training_frame, x, y)
136-
if( !missing(offset_column) && !is.null(offset_column)) args$x_ignore <- args$x_ignore[!( offset_column == args$x_ignore )]
137-
if( !missing(weights_column) && !is.null(weights_column)) args$x_ignore <- args$x_ignore[!( weights_column == args$x_ignore )]
138-
if( !missing(fold_column) && !is.null(fold_column)) args$x_ignore <- args$x_ignore[!( fold_column == args$x_ignore )]
148+
if (!missing(id_column)) {
149+
parms$id_column <- id_column
150+
} else {
151+
stop("ID column is required.")
152+
}
139153
parms$ignored_columns <- args$x_ignore
140154
parms$response_column <- args$y
141155

h2o-r/tests/testdir_algos/knn/runit_knn_smoke.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ source("../../../scripts/h2o-r-test-setup.R")
55

66
knn.smoke <- function() {
77
iris.hex <- h2o.uploadFile( locate("smalldata/iris/iris.csv"))
8-
iris.knn <- h2o.knn(x=1:4, training_frame=iris.hex, k = 3, distance="euclidean", seed = 1234)
8+
iris.knn <- h2o.knn(x=1:4, y=5, training_frame=iris.hex, k=3 , distance="euclidean", seed=1234)
99

1010
# Score test data with different default auc_type (previous was "NONE", so no AUC calculation)
1111
perf <- h2o.performance(iris.knn, test.hex, auc_type="WEIGHTED_OVO")

0 commit comments

Comments
 (0)