Skip to content

Commit 8f29aa4

Browse files
committed
improved tests
1 parent 2a5a221 commit 8f29aa4

7 files changed

Lines changed: 107 additions & 38 deletions

File tree

h2o-algos/src/main/java/hex/glm/GLM.java

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3416,7 +3416,7 @@ private void scoreAndUpdateModel() {
34163416
_model._useRemoveOffsetEffects = false;
34173417
}
34183418
}
3419-
3419+
_model.update(_job._key);
34203420
}
34213421

34223422
private void scorePostProcessingRestrictedModel(Frame train, long t1) {
@@ -3470,7 +3470,6 @@ private void scorePostProcessingRestrictedModel(Frame train, long t1) {
34703470
_job.update(_workPerIteration, _state.toString());
34713471
}
34723472
_model._output._scoring_history = _scoringHistory != null ? _scoringHistory.to2dTable(_parms, null, null) : null;
3473-
_model.update(_job._key);
34743473
}
34753474

34763475
private void scorePostProcessingRestrictedModelCVEnabled(Frame train, long t1) {
@@ -3509,10 +3508,8 @@ private void scorePostProcessingRestrictedModelCVEnabled(Frame train, long t1) {
35093508
objectiveControlVal, _state.deviance(task._likelihood), Double.NaN, mtrain._nobs, 1, _state.lambda(),
35103509
_state.alpha());
35113510

3512-
_job.update(_workPerIteration, _state.toString());
35133511
}
35143512
_model._output._scoring_history_restricted_model_cv = _scoringHistoryControlValEnabled != null ? _scoringHistoryControlValEnabled.to2dTable(_parms, null, null) : null;
3515-
_model.update(_job._key);
35163513
}
35173514
}
35183515

@@ -3548,10 +3545,8 @@ private void scorePostProcessingRestrictedModelROEnabled(Frame train, long t1) {
35483545
_state.objective(), _state.deviance(), Double.NaN, mtrain._nobs, 1, _state.lambda(),
35493546
_state.alpha());
35503547
}
3551-
_job.update(_workPerIteration, _state.toString());
35523548
}
35533549
_model._output._scoring_history_restricted_model_ro = _scoringHistoryRemoveOffsetEnabled != null ? _scoringHistoryRemoveOffsetEnabled.to2dTable(_parms, null, null) : null;
3554-
_model.update(_job._key);
35553550
}
35563551

35573552
private void scorePostProcessing(Frame train, long t1) {
@@ -3629,7 +3624,6 @@ private void scorePostProcessing(Frame train, long t1) {
36293624
_state.alpha());
36303625
}
36313626
}
3632-
_job.update(_workPerIteration, _state.toString());
36333627
}
36343628
if (_parms._lambda_search) {
36353629
_model._output._scoring_history = _lambdaSearchScoringHistory.to2dTable();
@@ -3640,8 +3634,6 @@ private void scorePostProcessing(Frame train, long t1) {
36403634
_model._output._scoring_history = _scoringHistory.to2dTable(_parms, _xval_deviances_generate_SH,
36413635
_xval_sd_generate_SH);
36423636
}
3643-
3644-
_model.update(_job._key);
36453637
_model.generateSummary(_parms._train, _state._iter);
36463638
_lastScore = System.currentTimeMillis();
36473639
long scoringTime = System.currentTimeMillis() - t1;

h2o-bindings/bin/custom/R/gen_glm.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,18 @@ def update_param(name, param):
8080
#' this feature (available only if control_variables and remove_offset_effects parameters are both set)
8181
#' @export
8282
h2o.make_unrestricted_glm_model <- function(model, destination_key = NULL, control_variables_enabled = FALSE, remove_offset_effects_enabled = FALSE) {
83-
stopifnot("GLM wasn't trained with control variables or with remove offset effects." = !is.null(model@params$actual[["control_variables"]]) || isTRUE(model@params$actual[["remove_offset_effects"]]))
84-
query <- list(method = "POST", .h2o.__GLMMakeUnrestrictedModel, model = model@model_id, control_variables_enabled=control_variables_enabled, remove_offset_effects_enabled=remove_offset_effects_enabled)
83+
stopifnot("GLM wasn't trained with control variables or with remove offset effects." =
84+
!is.null(model@params$actual[["control_variables"]]) || isTRUE(model@params$actual[["remove_offset_effects"]]))
85+
if (is.null(model@params$actual[["control_variables"]]) && isFALSE(model@params$actual[["remove_offset_effects"]])
86+
&& (isTRUE(control_variables_enabled) || isTRUE(remove_offset_effects_enabled))) {
87+
stop("GLM wasn't trained with both control variables and with remove offset effects feature set, the control_variables_enabled and remove_offset_effects_enabled features cannot be used.")
88+
}
89+
if ((!is.null(model@params$actual[["control_variables"]]) || isTRUE(model@params$actual[["remove_offset_effects"]]))
90+
&& (isTRUE(control_variables_enabled) && isTRUE(remove_offset_effects_enabled))){
91+
stop("The control_variables_enabled and remove_offset_effects_enabled feature cannot be used together. It produces the same model as the main model.")
92+
}
93+
query <- list(method = "POST", .h2o.__GLMMakeUnrestrictedModel, model = model@model_id,
94+
control_variables_enabled=control_variables_enabled, remove_offset_effects_enabled=remove_offset_effects_enabled)
8595
if (!missing(destination_key) && !is.null(destination_key)) {
8696
query <- c(query, list(dest = destination_key))
8797
}

h2o-bindings/bin/custom/python/gen_glm.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -335,10 +335,16 @@ def make_unrestricted_glm_model(self, dest=None, control_variables_enabled=False
335335
>>> m2 = m.make_unrestricted_glm_model(dest="unrestricted_glm")
336336
>>> p2 = m2.model_performance(d)
337337
>>> print(p2)
338-
>>> m3 = m.make_unrestricted_glm_model(dest="unrestricted_glm", control_variables_enabled=True)
338+
>>> m3 = m.make_unrestricted_glm_model(dest="unrestricted_glm_cv", control_variables_enabled=True)
339339
>>> p3 = m3.model_performance(d)
340340
>>> print(p3)
341341
"""
342+
if self.actual_params["control_variables"] is None and not(self.actual_params["remove_offset_effects"]):
343+
raise H2OValueError("GLM wasn't trained with control variables or with remove offset effects.")
344+
if (self.actual_params["control_variables"] is None or not(self.actual_params["remove_offset_effects"])) and (control_variables_enabled or remove_offset_effects_enabled):
345+
raise H2OValueError("GLM wasn't trained with both control variables and with remove offset effects feature set, the control_variables_enabled and remove_offset_effects_enabled features cannot be used.")
346+
if self.actual_params["control_variables"] is not None and self.actual_params["remove_offset_effects"] and (control_variables_enabled and remove_offset_effects_enabled):
347+
raise H2OValueError("The control_variables_enabled and remove_offset_effects_enabled feature cannot be used together. It produces the same model as the main model.")
342348
model_json = h2o.api(
343349
"POST /3/MakeUnrestrictedGLMModel",
344350
data={"model": self._model_json["model_id"]["name"],

h2o-py/h2o/estimators/glm.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2848,10 +2848,16 @@ def make_unrestricted_glm_model(self, dest=None, control_variables_enabled=False
28482848
>>> m2 = m.make_unrestricted_glm_model(dest="unrestricted_glm")
28492849
>>> p2 = m2.model_performance(d)
28502850
>>> print(p2)
2851-
>>> m3 = m.make_unrestricted_glm_model(dest="unrestricted_glm", control_variables_enabled=True)
2851+
>>> m3 = m.make_unrestricted_glm_model(dest="unrestricted_glm_cv", control_variables_enabled=True)
28522852
>>> p3 = m3.model_performance(d)
28532853
>>> print(p3)
28542854
"""
2855+
if self.actual_params["control_variables"] is None and not(self.actual_params["remove_offset_effects"]):
2856+
raise H2OValueError("GLM wasn't trained with control variables or with remove offset effects.")
2857+
if (self.actual_params["control_variables"] is None or not(self.actual_params["remove_offset_effects"])) and (control_variables_enabled or remove_offset_effects_enabled):
2858+
raise H2OValueError("GLM wasn't trained with both control variables and with remove offset effects feature set, the control_variables_enabled and remove_offset_effects_enabled features cannot be used.")
2859+
if self.actual_params["control_variables"] is not None and self.actual_params["remove_offset_effects"] and (control_variables_enabled and remove_offset_effects_enabled):
2860+
raise H2OValueError("The control_variables_enabled and remove_offset_effects_enabled feature cannot be used together. It produces the same model as the main model.")
28552861
model_json = h2o.api(
28562862
"POST /3/MakeUnrestrictedGLMModel",
28572863
data={"model": self._model_json["model_id"]["name"],

h2o-py/tests/testdir_algos/glm/pyunit_glm_make_unrestricted_model.py

Lines changed: 53 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2,33 +2,32 @@
22

33
from h2o.exceptions import H2OResponseError
44

5-
sys.path.insert(1,"../../../")
5+
sys.path.insert(1, "../../../")
66
import h2o
77
from tests import pyunit_utils
88
from h2o.estimators.glm import H2OGeneralizedLinearEstimator
99

1010

1111
def glm_unrestricted_model():
12-
1312
cars = h2o.upload_file(pyunit_utils.locate("smalldata/junit/cars_20mpg.csv"))
1413
cars = cars[cars["economy_20mpg"].isna() == 0]
1514
cars["name"] = cars["name"].asfactor()
1615
cars["economy_20mpg"] = cars["economy_20mpg"].asfactor()
1716
offset_col = "offset"
18-
offset = h2o.H2OFrame([[.5]]*398)
17+
offset = h2o.H2OFrame([[.5]] * 398)
1918
offset.set_names([offset_col])
2019
cars = cars.cbind(offset)
2120

2221
print("-- Model without control variables and remove offset effects --")
23-
glm_model = H2OGeneralizedLinearEstimator(family="binomial", score_each_iteration=True,
22+
glm_model = H2OGeneralizedLinearEstimator(family="binomial", score_each_iteration=True,
2423
generate_scoring_history=True, seed=0xC0FFEE)
2524
glm_model.train(x=["name", "power", "year"], y="economy_20mpg", training_frame=cars, offset_column=offset_col)
2625
metrics = glm_model.training_model_metrics()
2726
print(metrics)
2827
print(glm_model)
2928

3029
print("-- Model with control variables --")
31-
glm_model_cv = H2OGeneralizedLinearEstimator(family="binomial", control_variables=["year"],
30+
glm_model_cv = H2OGeneralizedLinearEstimator(family="binomial", control_variables=["year"],
3231
score_each_iteration=True, generate_scoring_history=True,
3332
seed=0xC0FFEE)
3433
glm_model_cv.train(x=["name", "power", "year"], y="economy_20mpg", training_frame=cars, offset_column=offset_col)
@@ -43,7 +42,7 @@ def glm_unrestricted_model():
4342
print(metrics_unrestricted_cv)
4443

4544
print("-- Model with remove offset effects --")
46-
glm_model_ro = H2OGeneralizedLinearEstimator(family="binomial", remove_offset_effects=True,
45+
glm_model_ro = H2OGeneralizedLinearEstimator(family="binomial", remove_offset_effects=True,
4746
generate_scoring_history=True,
4847
score_each_iteration=True, seed=0xC0FFEE)
4948
glm_model_ro.train(x=["name", "power", "year"], y="economy_20mpg", training_frame=cars, offset_column=offset_col)
@@ -58,7 +57,7 @@ def glm_unrestricted_model():
5857
print(metrics_unrestricted_ro)
5958

6059
print("-- Model with control variables and remove offset effects --")
61-
glm_model_cv_ro = H2OGeneralizedLinearEstimator(family="binomial", control_variables=["year"],
60+
glm_model_cv_ro = H2OGeneralizedLinearEstimator(family="binomial", control_variables=["year"],
6261
remove_offset_effects=True, generate_scoring_history=True,
6362
score_each_iteration=True, seed=0xC0FFEE)
6463
glm_model_cv_ro.train(x=["name", "power", "year"], y="economy_20mpg", training_frame=cars, offset_column=offset_col)
@@ -73,7 +72,7 @@ def glm_unrestricted_model():
7372
print(metrics_unrestricted_cv_ro)
7473

7574
print("-- Unrestricted model with control variables enabled and remove offset effects disabled --")
76-
glm_model_unrestricted_cv_true_ro_false = glm_model_cv_ro.make_unrestricted_glm_model(dest="cv_true",
75+
glm_model_unrestricted_cv_true_ro_false = glm_model_cv_ro.make_unrestricted_glm_model(dest="cv_true",
7776
control_variables_enabled=True)
7877
print(glm_model_unrestricted_cv_ro)
7978
metrics_unrestricted_cv_true_ro_false = glm_model_unrestricted_cv_true_ro_false.training_model_metrics()
@@ -107,36 +106,67 @@ def glm_unrestricted_model():
107106

108107
# check the coefficients
109108
for k in glm_model.coef().keys():
110-
pyunit_utils.assert_equals(glm_model.coef()[k], glm_model_unrestricted_cv.coef().get(k, float("NaN")), f"Coefficient {k} differs!")
109+
pyunit_utils.assert_equals(glm_model.coef()[k], glm_model_unrestricted_cv.coef().get(k, float("NaN")),
110+
f"Coefficient {k} differs!")
111111

112112
# check predictions are the same
113113
for i in range(predictions.shape[0]):
114-
pyunit_utils.assert_equals(predictions.iloc[i, 1], predictions_unrestricted_cv.iloc[i, 1], f"{i}th prediction differs!")
115-
pyunit_utils.assert_equals(predictions.iloc[i, 1], predictions_unrestricted_ro.iloc[i, 1], f"{i}th prediction differs!")
116-
pyunit_utils.assert_equals(predictions.iloc[i, 1], predictions_unrestricted_cv_ro.iloc[i, 1], f"{i}th prediction differs!")
117-
pyunit_utils.assert_equals(predictions_cv.iloc[i, 1], predictions_unrestricted_cv_true_ro_false.iloc[i, 1], f"{i}th prediction differs!")
118-
pyunit_utils.assert_equals(predictions_ro.iloc[i, 1], predictions_unrestricted_cv_false_ro_true.iloc[i, 1], f"{i}th prediction differs!")
119-
114+
pyunit_utils.assert_equals(predictions.iloc[i, 1], predictions_unrestricted_cv.iloc[i, 1],
115+
f"{i}th prediction differs!")
116+
pyunit_utils.assert_equals(predictions.iloc[i, 1], predictions_unrestricted_ro.iloc[i, 1],
117+
f"{i}th prediction differs!")
118+
pyunit_utils.assert_equals(predictions.iloc[i, 1], predictions_unrestricted_cv_ro.iloc[i, 1],
119+
f"{i}th prediction differs!")
120+
pyunit_utils.assert_equals(predictions_cv.iloc[i, 1], predictions_unrestricted_cv_true_ro_false.iloc[i, 1],
121+
f"{i}th prediction differs!")
122+
pyunit_utils.assert_equals(predictions_ro.iloc[i, 1], predictions_unrestricted_cv_false_ro_true.iloc[i, 1],
123+
f"{i}th prediction differs!")
124+
120125
# check predictions differ
121126
for i in range(predictions.shape[0]):
122-
pyunit_utils.assert_not_equal(predictions.iloc[i, 1], predictions_cv.iloc[i, 1], f"Predictions at position {i} should differ but they don't!")
123-
pyunit_utils.assert_not_equal(predictions.iloc[i, 1], predictions_ro.iloc[i, 1], f"Predictions at position {i} should differ but they don't!")
124-
pyunit_utils.assert_not_equal(predictions.iloc[i, 1], predictions_cv_ro.iloc[i, 1], f"Predictions at position {i} should differ but they don't!")
125-
pyunit_utils.assert_not_equal(predictions_unrestricted_cv_false_ro_true.iloc[i, 1], predictions_unrestricted_cv_true_ro_false.iloc[i, 1], f"Predictions at position {i} should differ but they don't!")
126-
127+
pyunit_utils.assert_not_equal(predictions.iloc[i, 1], predictions_cv.iloc[i, 1],
128+
f"Predictions at position {i} should differ but they don't!")
129+
pyunit_utils.assert_not_equal(predictions.iloc[i, 1], predictions_ro.iloc[i, 1],
130+
f"Predictions at position {i} should differ but they don't!")
131+
pyunit_utils.assert_not_equal(predictions.iloc[i, 1], predictions_cv_ro.iloc[i, 1],
132+
f"Predictions at position {i} should differ but they don't!")
133+
pyunit_utils.assert_not_equal(predictions_unrestricted_cv_false_ro_true.iloc[i, 1],
134+
predictions_unrestricted_cv_true_ro_false.iloc[i, 1],
135+
f"Predictions at position {i} should differ but they don't!")
136+
127137
print(glm_model_cv.scoring_history())
128138
print(glm_model_unrestricted_cv_true_ro_false.scoring_history())
129-
139+
130140
# check scoring history are the same
131-
pyunit_utils.assert_equal_scoring_history(glm_model, glm_model_unrestricted_cv,
141+
pyunit_utils.assert_equal_scoring_history(glm_model, glm_model_unrestricted_cv,
132142
["objective", "negative_log_likelihood"])
133-
pyunit_utils.assert_equal_scoring_history(glm_model_cv, glm_model_unrestricted_cv_true_ro_false,
143+
pyunit_utils.assert_equal_scoring_history(glm_model_cv, glm_model_unrestricted_cv_true_ro_false,
134144
["objective", "negative_log_likelihood", "deviance_train", "lambda"])
135145
pyunit_utils.assert_equal_scoring_history(glm_model_ro, glm_model_unrestricted_cv_false_ro_true,
136146
["objective", "negative_log_likelihood", "deviance_train", "lambda"])
137147
pyunit_utils.assert_equal_scoring_history(glm_model_unrestricted_cv, glm_model_unrestricted_cv_ro,
138148
["objective", "negative_log_likelihood", "deviance_train", "lambda"])
139149

150+
# should fail
151+
try:
152+
glm_model_ro.make_unrestricted_glm_model(dest="ro_true", remove_offset_effects_enabled=True)
153+
assert False, "Should have throw exception."
154+
except Exception as ex:
155+
print(ex)
156+
temp = str(ex)
157+
assert "GLM wasn't trained with both control variables and with remove offset effects feature set, the control_variables_enabled and remove_offset_effects_enabled features cannot be used." in temp, \
158+
"Wrong exception was received."
159+
160+
try:
161+
glm_model_cv_ro.make_unrestricted_glm_model(dest="ro_true", remove_offset_effects_enabled=True,
162+
control_variables_enabled=True)
163+
assert False, "Should have throw exception."
164+
except Exception as ex:
165+
print(ex)
166+
temp = str(ex)
167+
assert "The control_variables_enabled and remove_offset_effects_enabled feature cannot be used together. It produces the same model as the main model." in temp, \
168+
"Wrong exception was received."
169+
140170

141171
if __name__ == "__main__":
142172
pyunit_utils.standalone_test(glm_unrestricted_model)

h2o-r/h2o-package/R/glm.R

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -843,8 +843,18 @@ h2o.makeGLMModel <- function(model,beta) {
843843
#' this feature (available only if control_variables and remove_offset_effects parameters are both set)
844844
#' @export
845845
h2o.make_unrestricted_glm_model <- function(model, destination_key = NULL, control_variables_enabled = FALSE, remove_offset_effects_enabled = FALSE) {
846-
stopifnot("GLM wasn't trained with control variables or with remove offset effects." = !is.null(model@params$actual[["control_variables"]]) || isTRUE(model@params$actual[["remove_offset_effects"]]))
847-
query <- list(method = "POST", .h2o.__GLMMakeUnrestrictedModel, model = model@model_id, control_variables_enabled=control_variables_enabled, remove_offset_effects_enabled=remove_offset_effects_enabled)
846+
stopifnot("GLM wasn't trained with control variables or with remove offset effects." =
847+
!is.null(model@params$actual[["control_variables"]]) || isTRUE(model@params$actual[["remove_offset_effects"]]))
848+
if (is.null(model@params$actual[["control_variables"]]) && isFALSE(model@params$actual[["remove_offset_effects"]])
849+
&& (isTRUE(control_variables_enabled) || isTRUE(remove_offset_effects_enabled))) {
850+
stop("GLM wasn't trained with both control variables and with remove offset effects feature set, the control_variables_enabled and remove_offset_effects_enabled features cannot be used.")
851+
}
852+
if ((!is.null(model@params$actual[["control_variables"]]) || isTRUE(model@params$actual[["remove_offset_effects"]]))
853+
&& (isTRUE(control_variables_enabled) && isTRUE(remove_offset_effects_enabled))){
854+
stop("The control_variables_enabled and remove_offset_effects_enabled feature cannot be used together. It produces the same model as the main model.")
855+
}
856+
query <- list(method = "POST", .h2o.__GLMMakeUnrestrictedModel, model = model@model_id,
857+
control_variables_enabled=control_variables_enabled, remove_offset_effects_enabled=remove_offset_effects_enabled)
848858
if (!missing(destination_key) && !is.null(destination_key)) {
849859
query <- c(query, list(dest = destination_key))
850860
}

0 commit comments

Comments
 (0)