@@ -921,53 +921,20 @@ def fit(self, X, y, sample_weight=None, bags=None, init_score=None):
921921 provider = JobLibProvider (n_jobs = self .n_jobs )
922922
923923 bagged_intercept = None
924- if n_classes == Native .Task_MonoClassification :
925- bagged_intercept = np .full ((self .outer_bags , 1 ), - np .inf , np .float64 )
926- intercept_correction = None
927- elif objective_code == Native .Objective_Rmse :
928- bagged_intercept = np .empty ((self .outer_bags , 1 ), np .float64 )
929-
930- # RMSE is very special and we can do closed form even with init_scores
931- y_shifted = y if init_score is None else y - init_score
924+ if not is_differential_privacy :
925+ if n_classes == Native .Task_MonoClassification :
926+ bagged_intercept = np .full ((self .outer_bags , 1 ), - np .inf , np .float64 )
927+ intercept_correction = None
928+ elif objective_code == Native .Objective_Rmse :
929+ bagged_intercept = np .empty ((self .outer_bags , 1 ), np .float64 )
932930
933- for idx in range (self .outer_bags ):
934- bag = internal_bags [idx ]
935- sample_weight_local = sample_weight
936- y_local = y_shifted
937- if bag is not None :
938- include_samples = 0 < bag
939- y_local = y_local [include_samples ]
940- if sample_weight_local is None :
941- sample_weight_local = bag [include_samples ]
942- else :
943- sample_weight_local = (
944- sample_weight_local [include_samples ] * bag [include_samples ]
945- )
946-
947- bagged_intercept [idx , :] = np .average (
948- y_local , weights = sample_weight_local
949- )
950-
951- sample_weight_local = sample_weight
952- y_local = y_shifted
953- if visible_samples is not None :
954- y_local = y_local [visible_samples ]
955- if sample_weight_local is not None :
956- sample_weight_local = sample_weight_local [visible_samples ]
957-
958- intercept_correction = np .average (y_local , weights = sample_weight_local )
959- intercept_correction -= bagged_intercept .mean (axis = 0 )
960- elif init_score is None :
961- if (
962- objective_code == Native .Objective_LogLossBinary
963- or objective_code == Native .Objective_LogLossMulticlass
964- ):
965- bagged_intercept = np .empty ((self .outer_bags , n_scores ), np .float64 )
931+ # RMSE is very special and we can do closed form even with init_scores
932+ y_shifted = y if init_score is None else y - init_score
966933
967934 for idx in range (self .outer_bags ):
968935 bag = internal_bags [idx ]
969936 sample_weight_local = sample_weight
970- y_local = y
937+ y_local = y_shifted
971938 if bag is not None :
972939 include_samples = 0 < bag
973940 y_local = y_local [include_samples ]
@@ -979,26 +946,61 @@ def fit(self, X, y, sample_weight=None, bags=None, init_score=None):
979946 * bag [include_samples ]
980947 )
981948
982- probs = np .bincount (y_local , weights = sample_weight_local )
983- total = probs .sum ()
984- probs = probs .astype (np .float64 , copy = False )
985- probs /= total
986- bagged_intercept [idx , :] = link_func (probs , link , link_param )
949+ bagged_intercept [idx , :] = np .average (
950+ y_local , weights = sample_weight_local
951+ )
987952
988953 sample_weight_local = sample_weight
989- y_local = y
954+ y_local = y_shifted
990955 if visible_samples is not None :
991956 y_local = y_local [visible_samples ]
992957 if sample_weight_local is not None :
993958 sample_weight_local = sample_weight_local [visible_samples ]
994959
995- probs = np .bincount (y_local , weights = sample_weight_local )
996- total = probs .sum ()
997- probs = probs .astype (np .float64 , copy = False )
998- probs /= total
999-
1000- intercept_correction = link_func (probs , link , link_param )
960+ intercept_correction = np .average (y_local , weights = sample_weight_local )
1001961 intercept_correction -= bagged_intercept .mean (axis = 0 )
962+ elif init_score is None :
963+ if (
964+ objective_code == Native .Objective_LogLossBinary
965+ or objective_code == Native .Objective_LogLossMulticlass
966+ ):
967+ bagged_intercept = np .empty ((self .outer_bags , n_scores ), np .float64 )
968+
969+ for idx in range (self .outer_bags ):
970+ bag = internal_bags [idx ]
971+ sample_weight_local = sample_weight
972+ y_local = y
973+ if bag is not None :
974+ include_samples = 0 < bag
975+ y_local = y_local [include_samples ]
976+ if sample_weight_local is None :
977+ sample_weight_local = bag [include_samples ]
978+ else :
979+ sample_weight_local = (
980+ sample_weight_local [include_samples ]
981+ * bag [include_samples ]
982+ )
983+
984+ probs = np .bincount (y_local , weights = sample_weight_local )
985+ total = probs .sum ()
986+ probs = probs .astype (np .float64 , copy = False )
987+ probs /= total
988+ bagged_intercept [idx , :] = link_func (probs , link , link_param )
989+
990+ sample_weight_local = sample_weight
991+ y_local = y
992+ if visible_samples is not None :
993+ y_local = y_local [visible_samples ]
994+ if sample_weight_local is not None :
995+ sample_weight_local = sample_weight_local [visible_samples ]
996+
997+ probs = np .bincount (y_local , weights = sample_weight_local )
998+ total = probs .sum ()
999+ probs = probs .astype (np .float64 , copy = False )
1000+ probs /= total
1001+
1002+ intercept_correction = link_func (probs , link , link_param )
1003+ intercept_correction -= bagged_intercept .mean (axis = 0 )
10021004
10031005 if bagged_intercept is None :
10041006 # TODO: get the intercept for these non-default options by boosting on the intercept
@@ -1392,79 +1394,82 @@ def fit(self, X, y, sample_weight=None, bags=None, init_score=None):
13921394 bagged_intercept , bagged_scores , bin_weights , bag_weights
13931395 )
13941396
1395- if objective_code == Native .Objective_Rmse :
1396- scores = ebm_predict_scores (
1397- X ,
1398- n_samples ,
1399- feature_names_in ,
1400- feature_types_in ,
1401- bins ,
1402- intercept ,
1403- term_scores ,
1404- term_features ,
1405- init_score ,
1406- )
1397+ if not is_differential_privacy :
1398+ if objective_code == Native .Objective_Rmse :
1399+ scores = ebm_predict_scores (
1400+ X ,
1401+ n_samples ,
1402+ feature_names_in ,
1403+ feature_types_in ,
1404+ bins ,
1405+ intercept ,
1406+ term_scores ,
1407+ term_features ,
1408+ init_score ,
1409+ )
14071410
1408- sample_weight_local = sample_weight
1409- y_local = y
1410- if visible_samples is not None :
1411- scores = scores [visible_samples ]
1412- y_local = y_local [visible_samples ]
1413- if sample_weight_local is not None :
1414- sample_weight_local = sample_weight_local [visible_samples ]
1415-
1416- correction = np .average (y_local - scores , weights = sample_weight_local )
1417- intercept += correction
1418- bagged_intercept += correction
1419- elif (
1420- objective_code == Native .Objective_LogLossBinary
1421- or objective_code == Native .Objective_LogLossMulticlass
1422- ):
1423- scores = ebm_predict_scores (
1424- X ,
1425- n_samples ,
1426- feature_names_in ,
1427- feature_types_in ,
1428- bins ,
1429- intercept ,
1430- term_scores ,
1431- term_features ,
1432- init_score ,
1433- )
1411+ sample_weight_local = sample_weight
1412+ y_local = y
1413+ if visible_samples is not None :
1414+ scores = scores [visible_samples ]
1415+ y_local = y_local [visible_samples ]
1416+ if sample_weight_local is not None :
1417+ sample_weight_local = sample_weight_local [visible_samples ]
14341418
1435- sample_weight_local = sample_weight
1436- y_local = y
1437- if visible_samples is not None :
1438- scores = scores [visible_samples ]
1439- y_local = y_local [visible_samples ]
1440- if sample_weight_local is not None :
1441- sample_weight_local = sample_weight_local [visible_samples ]
1442-
1443- probs = np .bincount (y_local , weights = sample_weight_local )
1444- total = probs .sum ()
1445- probs = probs .astype (np .float64 , copy = False )
1446- probs /= total
1447- actual_scores = link_func (probs , link , link_param )
1448-
1449- n_correction_iterations = 25
1450- for _ in range (n_correction_iterations ):
1451- pred_prob = inv_link (scores , link , link_param )
1452- pred_prob = np .average (pred_prob , axis = 0 , weights = sample_weight_local )
1453- pred_scores = link_func (pred_prob , link , link_param )
1454- correction = actual_scores - pred_scores
1419+ correction = np .average (y_local - scores , weights = sample_weight_local )
14551420 intercept += correction
14561421 bagged_intercept += correction
1457- scores += correction
1458-
1459- if bagged_intercept .ndim == 2 :
1460- # multiclass
1461- # pick the class that we're going to zero
1462- zero_index = np .argmax (intercept )
1463- intercept -= intercept [zero_index ]
1464- bagged_intercept -= np .expand_dims (
1465- bagged_intercept [..., zero_index ], - 1
1422+ elif (
1423+ objective_code == Native .Objective_LogLossBinary
1424+ or objective_code == Native .Objective_LogLossMulticlass
1425+ ):
1426+ scores = ebm_predict_scores (
1427+ X ,
1428+ n_samples ,
1429+ feature_names_in ,
1430+ feature_types_in ,
1431+ bins ,
1432+ intercept ,
1433+ term_scores ,
1434+ term_features ,
1435+ init_score ,
14661436 )
14671437
1438+ sample_weight_local = sample_weight
1439+ y_local = y
1440+ if visible_samples is not None :
1441+ scores = scores [visible_samples ]
1442+ y_local = y_local [visible_samples ]
1443+ if sample_weight_local is not None :
1444+ sample_weight_local = sample_weight_local [visible_samples ]
1445+
1446+ probs = np .bincount (y_local , weights = sample_weight_local )
1447+ total = probs .sum ()
1448+ probs = probs .astype (np .float64 , copy = False )
1449+ probs /= total
1450+ actual_scores = link_func (probs , link , link_param )
1451+
1452+ n_correction_iterations = 25
1453+ for _ in range (n_correction_iterations ):
1454+ pred_prob = inv_link (scores , link , link_param )
1455+ pred_prob = np .average (
1456+ pred_prob , axis = 0 , weights = sample_weight_local
1457+ )
1458+ pred_scores = link_func (pred_prob , link , link_param )
1459+ correction = actual_scores - pred_scores
1460+ intercept += correction
1461+ bagged_intercept += correction
1462+ scores += correction
1463+
1464+ if bagged_intercept .ndim == 2 :
1465+ # multiclass
1466+ # pick the class that we're going to zero
1467+ zero_index = np .argmax (intercept )
1468+ intercept -= intercept [zero_index ]
1469+ bagged_intercept -= np .expand_dims (
1470+ bagged_intercept [..., zero_index ], - 1
1471+ )
1472+
14681473 if n_classes < Native .Task_GeneralClassification :
14691474 # scikit-learn requires intercept to be float for RegressorMixin, not numpy
14701475 intercept = float (intercept [0 ])
0 commit comments