Merge remote-tracking branch origin/rel-3.46.0

h2o-ops · h2o-ops · commit 7c2fda81846e · 2026-01-22T00:29:35.000Z
diff --git a/h2o-algos/src/main/java/hex/gam/GAM.java b/h2o-algos/src/main/java/hex/gam/GAM.java
@@ -67,7 +67,8 @@ public class GAM extends ModelBuilder<GAMModel, GAMModel.GAMParameters, GAMModel
   
   @Override
   public ModelCategory[] can_build() {
-    return new ModelCategory[]{ModelCategory.Regression};
+    return new ModelCategory[]{ModelCategory.Regression, ModelCategory.Binomial, ModelCategory.Multinomial, 
+            ModelCategory.Ordinal};
   }
 
   @Override
diff --git a/h2o-algos/src/main/java/hex/glm/GLM.java b/h2o-algos/src/main/java/hex/glm/GLM.java
@@ -108,8 +108,10 @@ public boolean isSupervised() {
   @Override
   public ModelCategory[] can_build() {
     return new ModelCategory[]{
-      ModelCategory.Regression,
-      ModelCategory.Binomial,
+            ModelCategory.Regression,
+            ModelCategory.Binomial,
+            ModelCategory.Multinomial,
+            ModelCategory.Ordinal
     };
   }
 
diff --git a/h2o-algos/src/main/java/hex/modelselection/ModelSelectionModel.java b/h2o-algos/src/main/java/hex/modelselection/ModelSelectionModel.java
@@ -48,7 +48,7 @@ protected double[] score0(double[] data, double[] preds) {
 
     @Override
     public Frame score(Frame fr, String destination_key, Job j, boolean computeMetrics, CFuncRef customMetricFunc) {
-        throw new UnsupportedOperationException("AnovaGLM does not support scoring on data.  It only provide " +
+        throw new UnsupportedOperationException("ModelSelection does not support scoring on data.  It only provide " +
                 "information on predictor relevance");
     }
 
diff --git a/h2o-bindings/bin/custom/python/gen_gam.py b/h2o-bindings/bin/custom/python/gen_gam.py
@@ -15,12 +15,6 @@ def update_param(name, param):
 
 
 def class_extensions():
-    def _additional_used_columns(self, parms):
-        """
-        :return: Gam columns if specified.
-        """
-        return parms["gam_columns"]
-
     def _summary(self):
         """Return a detailed summary of the model."""
         model = self._model_json["output"]
diff --git a/h2o-docs/src/product/automl.rst b/h2o-docs/src/product/automl.rst
@@ -3,49 +3,49 @@
    :scale: 50%
    :align: center
 
-H2O AutoML: Automatic Machine Learning
+H2O AutoML: Automatic machine learning
 ==================================
 
-In recent years, the demand for machine learning experts has outpaced the supply, despite the surge of people entering the field.  To address this gap, there have been big strides in the development of user-friendly machine learning software that can be used by non-experts.  The first steps toward simplifying machine learning involved developing simple, unified interfaces to a variety of machine learning algorithms (e.g. H2O).
+In recent years, the demand for machine learning experts has outpaced supply, despite a surge of people entering the field. To address this gap, significant progress has been made in developing user-friendly machine learning software that non-experts can use. The initial steps toward simplifying machine learning involved creating simple, unified interfaces for a variety of machine learning algorithms, such as H2O.
 
-Although H2O has made it easy for non-experts to experiment with machine learning, there is still a fair bit of knowledge and background in data science that is required to produce high-performing machine learning models.  Deep Neural Networks in particular are notoriously difficult for a non-expert to tune properly.  In order for machine learning software to truly be accessible to non-experts, we have designed an easy-to-use interface which automates the process of training a large selection of candidate models.  H2O's AutoML can also be a helpful tool for the advanced user, by providing a simple wrapper function that performs a large number of modeling-related tasks that would typically require many lines of code, and by freeing up their time to focus on other aspects of the data science pipeline tasks such as data-preprocessing, feature engineering and model deployment.
+Although H2O has made it easier for non-experts to experiment with machine learning, a fair bit of knowledge and background in data science is still required to produce high-performing models. Deep neural networks, in particular, are notoriously difficult for a non-expert to tune properly. To make machine learning software truly accessible to non-experts, we have designed an easy-to-use interface that automates the process of training a large selection of candidate models. H2O’s AutoML is also a helpful tool for advanced users. It provides a simple wrapper function that performs many modeling-related tasks, typically requiring extensive code, freeing up time to focus on other data science tasks such as data preprocessing, feature engineering, and model deployment.
 
 H2O's AutoML can be used for automating the machine learning workflow, which includes automatic training and tuning of many models within a user-specified time-limit.
 
 H2O offers a number of `model explainability <http://docs.h2o.ai/h2o/latest-stable/h2o-docs/explain.html>`__ methods that apply to AutoML objects (groups of models), as well as individual models (e.g. leader model).  Explanations can be generated automatically with a single function call, providing a simple interface to exploring and explaining the AutoML models.
 
 
-AutoML Interface
+AutoML interface
 ----------------
 
-The H2O AutoML interface is designed to have as few parameters as possible so that all the user needs to do is point to their dataset, identify the response column and optionally specify a time constraint or limit on the number of total models trained.  Below are the parameters that can be set by the user in the R and Python interfaces. See the `Web UI via H2O Wave <#web-ui-via-h2o-wave>`__ section below for information on how to use the H2O Wave web interface for AutoML.
+The H2O AutoML interface is designed to have as few parameters as possible so that all the user needs to do is point to their dataset, identify the response column, and optionally specify a time constraint or limit on the number of total models trained.  Below are the parameters that can be set by the user in the R and Python interfaces. See the `Web UI via H2O Wave <#web-ui-via-h2o-wave>`__ section below for information on how to use the H2O Wave web interface for AutoML.
 
 In both the R and Python API, AutoML uses the same data-related arguments, ``x``, ``y``, ``training_frame``, ``validation_frame``, as the other H2O algorithms.  Most of the time, all you'll need to do is specify the data arguments. You can then configure values for ``max_runtime_secs`` and/or ``max_models`` to set explicit time or number-of-model limits on your run.  
 
-Required Parameters
+Required parameters
 ~~~~~~~~~~~~~~~~~~~
 
-Required Data Parameters
+Required data parameters
 ''''''''''''''''''''''''
 
 - `y <data-science/algo-params/y.html>`__: This argument is the name (or index) of the response column. 
 
 - `training_frame <data-science/algo-params/training_frame.html>`__: Specifies the training set. 
 
-Required Stopping Parameters
+Required stopping parameters
 ''''''''''''''''''''''''''''
 
-One of the following stopping strategies (time or number-of-model based) must be specified.  When both options are set, then the AutoML run will stop as soon as it hits one of either When both options are set, then the AutoML run will stop as soon as it hits either of these limits.
+One of the following stopping strategies (time or number-of-model based) must be specified.  When both options are set, the AutoML run will stop as soon as it reaches either of these limits.
 
 - `max_runtime_secs <data-science/algo-params/max_runtime_secs.html>`__: This argument specifies the maximum time that the AutoML process will run for. The default is 0 (no limit), but dynamically sets to 1 hour if none of ``max_runtime_secs`` and ``max_models`` are specified by the user.
 
 - `max_models <data-science/algo-params/max_models.html>`__: Specify the maximum number of models to build in an AutoML run, excluding the Stacked Ensemble models.  Defaults to ``NULL/None``. Always set this parameter to ensure AutoML reproducibility: all models are then trained until convergence and none is constrained by a time budget.
 
 
-Optional Parameters
+Optional parameters
 ~~~~~~~~~~~~~~~~~~~
 
-Optional Data Parameters
+Optional data parameters
 ''''''''''''''''''''''''
 
 - `x <data-science/algo-params/x.html>`__: A list/vector of predictor column names or indexes.  This argument only needs to be specified if the user wants to exclude columns from the set of predictors.  If all columns (other than the response) should be used in prediction, then this does not need to be set.
@@ -60,7 +60,7 @@ Optional Data Parameters
 
 - `weights_column <data-science/algo-params/weights_column.html>`__: Specifies a column with observation weights. Giving some observation a weight of zero is equivalent to excluding it from the dataset; giving an observation a relative weight of 2 is equivalent to repeating that row twice. Negative weights are not allowed.
 
-Optional Miscellaneous Parameters
+Optional miscellaneous parameters
 '''''''''''''''''''''''''''''''''
 
 - `nfolds <data-science/algo-params/nfolds.html>`__:  Specify a value >= 2 for the number of folds for k-fold cross-validation of the models in the AutoML run or specify "-1" to let AutoML choose if k-fold cross-validation or blending mode should be used. Blending mode will use part of ``training_frame`` (if no ``blending_frame`` is provided) to train Stacked Ensembles. Use 0 to disable cross-validation; this will also disable Stacked Ensembles (thus decreasing the overall best model performance). This value defaults to "-1".
@@ -142,17 +142,17 @@ Optional Miscellaneous Parameters
 Notes
 ~~~~~
 
-Validation Options
+Validation options
 ''''''''''''''''''
 
 If the user turns off cross-validation by setting ``nfolds == 0``, then cross-validation metrics will not be available to populate the leaderboard.  In this case, we need to make sure there is a holdout frame (i.e. the "leaderboard frame") to score the models on so that we can generate model performance metrics for the leaderboard.  Without cross-validation, we will also require a validation frame to be used for early stopping on the models.  Therefore, if either of these frames are not provided by the user, they will be automatically partitioned from the training data.  If either frame is missing, 10% of the training data will be used to create a missing frame (if both are missing then a total of 20% of the training data will be used to create a 10% validation and 10% leaderboard frame).
 
-XGBoost Memory Requirements
+XGBoost memory requirements
 '''''''''''''''''''''''''''
 
 XGBoost, which is included in H2O as a third party library, requires its own memory outside the H2O (Java) cluster. When running AutoML with XGBoost (it is included by default), be sure you allow H2O no more than 2/3 of the total available RAM.  Example:  If you have 60G RAM, use ``h2o.init(max_mem_size = "40G")``, leaving 20G for XGBoost.
 
-Scikit-learn Compatibility
+Scikit-learn compatibility
 ''''''''''''''''''''''''''
 
 ``H2OAutoML`` can interact with the ``h2o.sklearn`` module. The ``h2o.sklearn`` module exposes 2 wrappers for ``H2OAutoML`` (``H2OAutoMLClassifier`` and ``H2OAutoMLRegressor``), which expose the standard API familiar to ``sklearn`` users: ``fit``, ``predict``, ``fit_predict``, ``score``, ``get_params``, and ``set_params``. It accepts various formats as input data (H2OFrame, ``numpy`` array, ``pandas`` Dataframe) which allows them to be combined with pure ``sklearn`` components in pipelines. For an example using ``H2OAutoML`` with the ``h2o.sklearn`` module, click `here <https://github.com/h2oai/h2o-tutorials/blob/master/tutorials/sklearn-integration/H2OAutoML_as_sklearn_estimator.ipynb>`__.
@@ -164,7 +164,7 @@ Explainability
 AutoML objects are fully supported though the `H2O Model Explainability <http://docs.h2o.ai/h2o/latest-stable/h2o-docs/explain.html>`__ interface.  A large number of multi-model comparison and single model (AutoML leader) plots can be generated automatically with a single call to ``h2o.explain()``.  We invite you to learn more at page linked above.
 
 
-Code Examples
+Code examples
 -------------
 
 Training
@@ -323,7 +323,7 @@ Using the previous code example, you can generate test set predictions as follow
         preds = aml.leader.predict(test)
 
 
-AutoML Output
+AutoML output
 -------------
 
 Leaderboard 
@@ -365,7 +365,7 @@ Here is an example of a leaderboard (with all columns) for a binary classificati
 
 To create a leaderboard with metrics from a new ``leaderboard_frame`` `h2o.make_leaderboard <performance-and-prediction.html#leaderboard>`__ can be used.
 
-Examine Models
+Examine models
 ~~~~~~~~~~~~~~
 
 To examine the trained models more closely, you can interact with the models, either by model ID, or a convenience function which can grab the best model of each model type (ranked by the default metric, or a metric of your choosing).  
@@ -438,7 +438,7 @@ Once you have retreived the model in R or Python, you can inspect the model para
         xgb.params['ntrees']
 
 
-AutoML Log
+AutoML log
 ~~~~~~~~~~
 
 When using Python or R clients, you can also access meta information with the following AutoML object properties:
@@ -496,7 +496,7 @@ Below are a few screenhots of the app, though more visualizations are available
    :align: center   
 
 
-Experimental Features
+Experimental features
 ---------------------
 
 Preprocessing
@@ -614,15 +614,15 @@ Information about how to cite the H2O software in general is covered in the `H2O
 We would love to hear how you've used H2O AutoML, 
 so if you have a paper that references it, please let us know by opening an issue or submitting a PR to the `Awesome H2O repo <https://github.com/h2oai/awesome-h2o#research-papers>`__ on Github.  This is the place that we keep track of papers that use H2O AutoML, and H2O generally.
 
-Random Grid Search Parameters
+Random grid search parameters
 -----------------------------
 
 AutoML performs a hyperparameter search over a variety of H2O algorithms in order to deliver the best model. In the table below, we list the hyperparameters, along with all potential values that can be randomly chosen in the search. If these models also have a non-default value set for a hyperparameter, we identify it in the list as well. Random Forest and Extremely Randomized Trees are not grid searched (in the current version of AutoML), so they are not included in the list below.
 
 **Note**: AutoML does not run a standard grid search for GLM (returning all the possible models). Instead AutoML builds a single model with ``lambda_search`` enabled and passes a list of ``alpha`` values. It returns only the model with the best alpha-lambda combination rather than one model for each alpha-lambda combination.
 
 
-GLM Hyperparameters
+GLM hyperparameters
 ~~~~~~~~~~~~~~~~~~~
 
 This table shows the GLM values that are searched over when performing AutoML grid search. Additional information is available `here <https://github.com/h2oai/h2o-3/blob/master/h2o-automl/src/main/java/ai/h2o/automl/modeling/GLMStepsProvider.java>`__.
@@ -636,7 +636,7 @@ This table shows the GLM values that are searched over when performing AutoML gr
 +-----------------------------+---------------------------------------------------------------------------------------------+
 
 
-XGBoost Hyperparameters
+XGBoost hyperparameters
 ~~~~~~~~~~~~~~~~~~~~~~~
 
 This table shows the XGBoost values that are searched over when performing AutoML grid search. Additional information is available `here <https://github.com/h2oai/h2o-3/blob/master/h2o-automl/src/main/java/ai/h2o/automl/modeling/XGBoostSteps.java>`__.
@@ -664,7 +664,7 @@ This table shows the XGBoost values that are searched over when performing AutoM
 +------------------------------+---------------------------------------------------------------------------------------------+
 
 
-GBM Hyperparameters
+GBM hyperparameters
 ~~~~~~~~~~~~~~~~~~~
 
 This table shows the GLM values that are searched over when performing AutoML grid search. Additional information is available `here <https://github.com/h2oai/h2o-3/blob/master/h2o-automl/src/main/java/ai/h2o/automl/modeling/GBMStepsProvider.java>`__.
@@ -690,7 +690,7 @@ This table shows the GLM values that are searched over when performing AutoML gr
 +------------------------------+---------------------------------------------------------------------------------------------+
 
 
-Deep Learning Hyperparameters
+Deep learning hyperparameters
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 This table shows the Deep Learning values that are searched over when performing AutoML grid search. Additional information is available `here <https://github.com/h2oai/h2o-3/blob/master/h2o-automl/src/main/java/ai/h2o/automl/modeling/DeepLearningStepsProvider.java>`__.
@@ -718,7 +718,7 @@ This table shows the Deep Learning values that are searched over when performing
 +------------------------------+----------------------------------------------------------------------------------------------------------+
 
 
-Additional Information
+Additional information
 ----------------------
 
 H2O AutoML development is tracked in the `h2o-3 Github repo <https://github.com/h2oai/h2o-3/issues>`__.
diff --git a/h2o-logging/impl-log4j2/build.gradle b/h2o-logging/impl-log4j2/build.gradle
@@ -5,8 +5,8 @@ compileJava {
 }
 
 dependencies {
-    api("org.apache.logging.log4j:log4j-1.2-api:2.17.1")
-    api("org.apache.logging.log4j:log4j-core:2.17.1")
+    api("org.apache.logging.log4j:log4j-1.2-api:2.25.3")
+    api("org.apache.logging.log4j:log4j-core:2.25.3")
 
     testImplementation group: 'junit', name: 'junit', version: '4.12'
 }
diff --git a/h2o-py/h2o/estimators/gam.py b/h2o-py/h2o/estimators/gam.py
@@ -1595,12 +1595,6 @@ def gainslift_bins(self, gainslift_bins):
 
     Lambda = deprecated_property('Lambda', lambda_)
 
-    def _additional_used_columns(self, parms):
-        """
-        :return: Gam columns if specified.
-        """
-        return parms["gam_columns"]
-
     def _summary(self):
         """Return a detailed summary of the model."""
         model = self._model_json["output"]
diff --git a/h2o-py/tests/testdir_algos/glm/pyunit_benign_glm.py b/h2o-py/tests/testdir_algos/glm/pyunit_benign_glm.py
@@ -7,7 +7,7 @@
 
 def test_benign():
     training_data = h2o.import_file(pyunit_utils.locate("smalldata/logreg/benign.csv"))
-
+    training_data[3] = training_data[3].asfactor()
     Y = 3
     X = [0, 1, 2, 4, 5, 6, 7, 8, 9, 10]
 

Original file line number	Diff line number	Diff line change
`@@ -67,7 +67,8 @@ public class GAM extends ModelBuilder<GAMModel, GAMModel.GAMParameters, GAMModel`
`67`	`67`
`68`	`68`	`@Override`
`69`	`69`	`public ModelCategory[] can_build() {`
`70`		`- return new ModelCategory[]{ModelCategory.Regression};`
	`70`	`+ return new ModelCategory[]{ModelCategory.Regression, ModelCategory.Binomial, ModelCategory.Multinomial,`
	`71`	`+ ModelCategory.Ordinal};`
`71`	`72`	`}`
`72`	`73`
`73`	`74`	`@Override`
Original file line number	Diff line number	Diff line change
`@@ -48,7 +48,7 @@ protected double[] score0(double[] data, double[] preds) {`
`48`	`48`
`49`	`49`	`@Override`
`50`	`50`	`public Frame score(Frame fr, String destination_key, Job j, boolean computeMetrics, CFuncRef customMetricFunc) {`
`51`		`- throw new UnsupportedOperationException("AnovaGLM does not support scoring on data. It only provide " +`
	`51`	`+ throw new UnsupportedOperationException("ModelSelection does not support scoring on data. It only provide " +`
`52`	`52`	`"information on predictor relevance");`
`53`	`53`	`}`
`54`	`54`
Original file line number	Diff line number	Diff line change
`@@ -5,8 +5,8 @@ compileJava {`
`5`	`5`	`}`
`6`	`6`
`7`	`7`	`dependencies {`
`8`		`- api("org.apache.logging.log4j:log4j-1.2-api:2.17.1")`
`9`		`- api("org.apache.logging.log4j:log4j-core:2.17.1")`
	`8`	`+ api("org.apache.logging.log4j:log4j-1.2-api:2.25.3")`
	`9`	`+ api("org.apache.logging.log4j:log4j-core:2.25.3")`
`10`	`10`
`11`	`11`	`testImplementation group: 'junit', name: 'junit', version: '4.12'`
`12`	`12`	`}`