Skip to content

Commit 633d62e

Browse files
Copilotmaurever
andcommitted
Add input validation for H statistic and tests
Co-authored-by: maurever <11465784+maurever@users.noreply.github.com>
1 parent c9c1711 commit 633d62e

3 files changed

Lines changed: 143 additions & 0 deletions

File tree

h2o-algos/src/main/java/hex/tree/gbm/GBMModel.java

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,35 @@ public TwoDimTable[][] getFeatureInteractionsTable(int maxInteractionDepth, int
315315

316316
@Override
317317
public double getFriedmanPopescusH(Frame frame, String[] vars) {
318+
// Validate vars parameter
319+
if (vars == null || vars.length == 0) {
320+
throw new IllegalArgumentException(
321+
"Calculating H statistics error: 'vars' parameter cannot be null or empty. " +
322+
"Please specify at least one variable.");
323+
}
324+
325+
// Validate that all specified columns exist in the frame and are numeric
326+
for (String varName : vars) {
327+
if (varName == null) {
328+
throw new IllegalArgumentException(
329+
"Calculating H statistics error: 'vars' parameter contains a null value.");
330+
}
331+
Vec col = frame.vec(varName);
332+
if (col == null) {
333+
throw new IllegalArgumentException(
334+
"Calculating H statistics error: column '" + varName + "' does not exist in the frame.");
335+
}
336+
if (!col.isNumeric()) {
337+
throw new IllegalArgumentException(
338+
"Calculating H statistics error: column '" + varName + "' is not numeric. " +
339+
"H statistics can only be calculated for numeric variables.");
340+
}
341+
if (col.isBad() || col.isConst()) {
342+
throw new IllegalArgumentException(
343+
"Calculating H statistics error: column '" + varName + "' contains only missing or constant values.");
344+
}
345+
}
346+
318347
Frame adaptFrm = removeSpecialNNonNumericColumns(frame);
319348

320349
for(int colId = 0; colId < adaptFrm.numCols(); colId++) {

h2o-algos/src/test/java/hex/tree/gbm/GBMTest.java

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5272,5 +5272,90 @@ private void printNode(SharedTreeNode n, int depth, StringBuilder sb) {
52725272
printNode(n.getLeftChild(), depth + 1, sb);
52735273
printNode(n.getRightChild(), depth + 1, sb);
52745274
}
5275+
5276+
@Test
5277+
public void testFriedmanPopescusHInputValidation() {
5278+
GBMModel gbm = null;
5279+
Frame fr = null;
5280+
try {
5281+
Scope.enter();
5282+
fr = parseTestFile("./smalldata/logreg/prostate.csv");
5283+
fr.remove("ID").remove();
5284+
Scope.track(fr);
5285+
5286+
GBMModel.GBMParameters parms = makeGBMParameters();
5287+
parms._train = fr._key;
5288+
parms._response_column = "CAPSULE";
5289+
parms._seed = 1234;
5290+
parms._ntrees = 5;
5291+
parms._max_depth = 5;
5292+
5293+
gbm = new GBM(parms).trainModel().get();
5294+
Scope.track_generic(gbm);
5295+
5296+
// Test 1: null vars parameter
5297+
try {
5298+
gbm.getFriedmanPopescusH(fr, null);
5299+
fail("Expected IllegalArgumentException for null vars parameter");
5300+
} catch (IllegalArgumentException e) {
5301+
assertTrue("Error message should mention 'vars' parameter",
5302+
e.getMessage().contains("'vars' parameter"));
5303+
assertTrue("Error message should mention null",
5304+
e.getMessage().contains("null"));
5305+
}
5306+
5307+
// Test 2: empty vars parameter
5308+
try {
5309+
gbm.getFriedmanPopescusH(fr, new String[]{});
5310+
fail("Expected IllegalArgumentException for empty vars parameter");
5311+
} catch (IllegalArgumentException e) {
5312+
assertTrue("Error message should mention 'vars' parameter",
5313+
e.getMessage().contains("'vars' parameter"));
5314+
assertTrue("Error message should mention empty",
5315+
e.getMessage().contains("empty"));
5316+
}
5317+
5318+
// Test 3: vars parameter with null element
5319+
try {
5320+
gbm.getFriedmanPopescusH(fr, new String[]{"AGE", null});
5321+
fail("Expected IllegalArgumentException for null element in vars");
5322+
} catch (IllegalArgumentException e) {
5323+
assertTrue("Error message should mention null value",
5324+
e.getMessage().contains("null value"));
5325+
}
5326+
5327+
// Test 4: non-existent column
5328+
try {
5329+
gbm.getFriedmanPopescusH(fr, new String[]{"AGE", "NONEXISTENT"});
5330+
fail("Expected IllegalArgumentException for non-existent column");
5331+
} catch (IllegalArgumentException e) {
5332+
assertTrue("Error message should mention the column name",
5333+
e.getMessage().contains("NONEXISTENT"));
5334+
assertTrue("Error message should mention 'does not exist'",
5335+
e.getMessage().contains("does not exist"));
5336+
}
5337+
5338+
// Test 5: non-numeric column (GLEASON is categorical in prostate dataset)
5339+
try {
5340+
gbm.getFriedmanPopescusH(fr, new String[]{"AGE", "GLEASON"});
5341+
fail("Expected IllegalArgumentException for non-numeric column");
5342+
} catch (IllegalArgumentException e) {
5343+
assertTrue("Error message should mention column is not numeric",
5344+
e.getMessage().contains("not numeric"));
5345+
}
5346+
5347+
// Test 6: Valid case - should not throw exception
5348+
try {
5349+
double h = gbm.getFriedmanPopescusH(fr, new String[]{"AGE", "PSA"});
5350+
assertTrue("H statistic should be non-negative", h >= 0.0);
5351+
} catch (Exception e) {
5352+
fail("Valid input should not throw exception: " + e.getMessage());
5353+
}
5354+
5355+
} finally {
5356+
if (gbm != null) gbm.delete();
5357+
Scope.exit();
5358+
}
5359+
}
52755360

52765361
}

h2o-extensions/xgboost/src/main/java/hex/tree/xgboost/XGBoostModel.java

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1036,6 +1036,35 @@ protected Frame removeSpecialNNonNumericColumns(Frame frame) {
10361036

10371037
@Override
10381038
public double getFriedmanPopescusH(Frame frame, String[] vars) {
1039+
// Validate vars parameter
1040+
if (vars == null || vars.length == 0) {
1041+
throw new IllegalArgumentException(
1042+
"Calculating H statistics error: 'vars' parameter cannot be null or empty. " +
1043+
"Please specify at least one variable.");
1044+
}
1045+
1046+
// Validate that all specified columns exist in the frame and are numeric
1047+
for (String varName : vars) {
1048+
if (varName == null) {
1049+
throw new IllegalArgumentException(
1050+
"Calculating H statistics error: 'vars' parameter contains a null value.");
1051+
}
1052+
Vec col = frame.vec(varName);
1053+
if (col == null) {
1054+
throw new IllegalArgumentException(
1055+
"Calculating H statistics error: column '" + varName + "' does not exist in the frame.");
1056+
}
1057+
if (!col.isNumeric()) {
1058+
throw new IllegalArgumentException(
1059+
"Calculating H statistics error: column '" + varName + "' is not numeric. " +
1060+
"H statistics can only be calculated for numeric variables.");
1061+
}
1062+
if (col.isBad() || col.isConst()) {
1063+
throw new IllegalArgumentException(
1064+
"Calculating H statistics error: column '" + varName + "' contains only missing or constant values.");
1065+
}
1066+
}
1067+
10391068
Frame adaptFrm = removeSpecialNNonNumericColumns(frame);
10401069

10411070
for(int colId = 0; colId < adaptFrm.numCols(); colId++) {

0 commit comments

Comments
 (0)