@@ -186,9 +186,9 @@ class HonestTreeClassifier(MetaEstimatorMixin, ClassifierMixin, BaseDecisionTree
186186 classes). If "empirical", the prior tree posterior is the relative
187187 class frequency in the voting subsample.
188188
189- stratify : bool
189+ stratify : bool, default=True
190190 Whether or not to stratify sample when considering structure and leaf indices.
191- By default False .
191+ By default True .
192192
193193 honest_method : {"apply", "prune"}, default="apply"
194194 Method to use for fitting the leaf nodes. If "apply", the leaf nodes
@@ -197,6 +197,12 @@ class frequency in the voting subsample.
197197 by pruning using the honest-set of data after the tree structure is built
198198 using the structure-set of data.
199199
200+ kernel_method : bool, default=False
201+ Method for normalizing ``predict_proba`` posteriors by the number of
202+ samples in the leaf nodes across the forest. Not applicalble to single
203+ honest trees.
204+ By default False.
205+
200206 **tree_estimator_params : dict
201207 Parameters to pass to the underlying base tree estimators.
202208 These must be parameters for ``tree_estimator``.
@@ -338,8 +344,9 @@ def __init__(
338344 monotonic_cst = None ,
339345 honest_fraction = 0.5 ,
340346 honest_prior = "empirical" ,
341- stratify = False ,
347+ stratify = True ,
342348 honest_method = "apply" ,
349+ kernel_method = False ,
343350 ** tree_estimator_params ,
344351 ):
345352 self .tree_estimator = tree_estimator
@@ -361,6 +368,7 @@ def __init__(
361368 self .honest_prior = honest_prior
362369 self .stratify = stratify
363370 self .honest_method = honest_method
371+ self .kernel_method = kernel_method
364372
365373 # XXX: to enable this, we need to also reset the leaf node samples during `_set_leaf_nodes`
366374 self .store_leaf_values = False
@@ -876,9 +884,11 @@ class in a leaf.
876884
877885 if self .n_outputs_ == 1 :
878886 proba = proba [:, : self ._tree_n_classes_ ]
879- # normalizer = proba.sum(axis=1)[:, np.newaxis]
880- # normalizer[normalizer == 0.0] = 1.0
881- # proba /= normalizer
887+
888+ if not self .kernel_method :
889+ normalizer = proba .sum (axis = 1 )[:, np .newaxis ]
890+ normalizer [normalizer == 0.0 ] = 1.0
891+ proba /= normalizer
882892 proba = self ._empty_leaf_correction (proba )
883893
884894 return proba
@@ -888,10 +898,13 @@ class in a leaf.
888898
889899 for k in range (self .n_outputs_ ):
890900 proba_k = proba [:, k , : self ._tree_n_classes_ [k ]]
891- normalizer = proba_k .sum (axis = 1 )[:, np .newaxis ]
892- # normalizer[normalizer == 0.0] = 1.0
893- # proba_k /= normalizer
894- # proba_k = self._empty_leaf_correction(proba_k, k)
901+
902+ if not self .kernel_method :
903+ normalizer = proba_k .sum (axis = 1 )[:, np .newaxis ]
904+ normalizer [normalizer == 0.0 ] = 1.0
905+ proba_k /= normalizer
906+ proba_k = self ._empty_leaf_correction (proba_k , k )
907+
895908 all_proba .append (proba_k )
896909
897910 return all_proba
0 commit comments