diff --git a/python-package/lightgbm/callback.py b/python-package/lightgbm/callback.py index 8018dd92efd2..457dd07b3423 100644 --- a/python-package/lightgbm/callback.py +++ b/python-package/lightgbm/callback.py @@ -239,8 +239,9 @@ def __call__(self, env: CallbackEnv) -> None: "Only list and callable values are supported " "as a mapping from boosting round index to new parameter value." ) - if new_param != env.params.get(key, None): - new_parameters[key] = new_param + # Always update parameters to ensure compatibility with sklearn interface + # The booster's reset_parameter method handles whether the parameter actually changes + new_parameters[key] = new_param if new_parameters: if isinstance(env.model, Booster): env.model.reset_parameter(new_parameters) diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py index 58988eeb6e2f..6dd8ac3b7567 100644 --- a/python-package/lightgbm/sklearn.py +++ b/python-package/lightgbm/sklearn.py @@ -975,6 +975,29 @@ def fit( else: sample_weight = np.multiply(sample_weight, class_sample_weight) + # Filter out booster-specific parameters from dataset parameters + from .basic import _ConfigAliases + + # Get all booster parameter names to exclude from dataset + booster_params = set() + # Common booster parameters that should not be in dataset + booster_param_names = [ + 'boosting', 'learning_rate', 'num_leaves', 'max_depth', 'min_split_gain', + 'min_child_weight', 'min_child_samples', 'subsample', 'subsample_freq', + 'colsample_bytree', 'colsample_bynode', 'colsample_bylevel', 'reg_alpha', + 'reg_lambda', 'random_state', 'n_estimators', 'num_iterations', + 'feature_fraction', 'bagging_fraction', 'feature_fraction_bynode', + 'bagging_freq', 'lambda_l1', 'lambda_l2', 'min_gain_to_split', + 'min_data_in_leaf', 'min_sum_hessian_in_leaf', 'max_delta_step', + 'max_bin', 'max_depth', 'min_data_in_bin', 'bin_construct_sample_cnt' + ] + + for param_name in booster_param_names: + booster_params.update(_ConfigAliases.get_sorted(param_name)) + + # Filter dataset parameters + dataset_params = {k: v for k, v in params.items() if k not in booster_params} + train_set = Dataset( data=_X, label=_y, @@ -983,7 +1006,7 @@ def fit( init_score=init_score, categorical_feature=categorical_feature, feature_name=feature_name, - params=params, + params=dataset_params, ) valid_sets: List[Dataset] = [] @@ -1023,6 +1046,28 @@ def fit( name="eval_group", i=i, ) + # Filter out booster-specific parameters from dataset parameters + from .basic import _ConfigAliases + + # Get all booster parameter names to exclude from dataset + booster_params = set() + booster_param_names = [ + 'boosting', 'learning_rate', 'num_leaves', 'max_depth', 'min_split_gain', + 'min_child_weight', 'min_child_samples', 'subsample', 'subsample_freq', + 'colsample_bytree', 'colsample_bynode', 'colsample_bylevel', 'reg_alpha', + 'reg_lambda', 'random_state', 'n_estimators', 'num_iterations', + 'feature_fraction', 'bagging_fraction', 'feature_fraction_bynode', + 'bagging_freq', 'lambda_l1', 'lambda_l2', 'min_gain_to_split', + 'min_data_in_leaf', 'min_sum_hessian_in_leaf', 'max_delta_step', + 'max_bin', 'max_depth', 'min_data_in_bin', 'bin_construct_sample_cnt' + ] + + for param_name in booster_param_names: + booster_params.update(_ConfigAliases.get_sorted(param_name)) + + # Filter dataset parameters + valid_dataset_params = {k: v for k, v in params.items() if k not in booster_params} + valid_set = Dataset( data=valid_data[0], label=valid_data[1], @@ -1030,7 +1075,7 @@ def fit( group=valid_group, init_score=valid_init_score, categorical_feature="auto", - params=params, + params=valid_dataset_params, ) valid_sets.append(valid_set) diff --git a/tests/python_package_test/test_callback.py b/tests/python_package_test/test_callback.py index 48c7a29e8705..6bc9d762e5ae 100644 --- a/tests/python_package_test/test_callback.py +++ b/tests/python_package_test/test_callback.py @@ -63,3 +63,29 @@ def test_reset_parameter_callback_is_picklable(serializer): assert callback_from_disk.before_iteration is True assert callback.kwargs == callback_from_disk.kwargs assert callback.kwargs == params + + +def test_reset_parameter_callback_with_sklearn(): + """Test that reset_parameter callback works with LGBMClassifier.""" + import numpy as np + import lightgbm as lgb + from lightgbm import LGBMClassifier + from sklearn.datasets import make_classification + + X, y = make_classification(n_samples=1000, n_features=10, random_state=42) + + model = LGBMClassifier( + n_estimators=10, + colsample_bytree=0.9, # Start high + callbacks=[lgb.reset_parameter(colsample_bytree=[0.3, 0.8, 0.3, 0.8, 0.3, 0.8, 0.3, 0.8, 0.3, 0.8])], + verbose=-1 + ) + model.fit(X, y) + + trees_df = model.booster_.trees_to_dataframe() + unique_feature_counts = trees_df.groupby('tree_index')['split_feature'].nunique() + + assert unique_feature_counts.nunique() > 1, ( + f"reset_parameter callback did not work with LGBMClassifier. " + f"All trees used the same number of features. Counts: {unique_feature_counts.unique()}" + ) \ No newline at end of file