Skip to content
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion include/LightGBM/cuda/cuda_tree.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ class CUDATree : public Tree {

double* cuda_leaf_value_ref() const { return cuda_leaf_value_; }

int host_leaf_depth(int leaf_index) {
int host_leaf_depth(int leaf_index) {
if (leaf_index >= 0 && leaf_index < num_leaves_) {
return host_leaf_depth_[leaf_index];
} else {
Expand Down
6 changes: 3 additions & 3 deletions src/treelearner/cuda/cuda_best_split_finder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -331,18 +331,18 @@ void CUDABestSplitFinder::FindBestSplitsForLeaf(
const data_size_t num_data_in_larger_leaf,
const double sum_hessians_in_smaller_leaf,
const double sum_hessians_in_larger_leaf,
const int small_leaf_depth,
const int smaller_leaf_depth,
const int larger_leaf_depth,
const score_t* grad_scale,
const score_t* hess_scale,
const uint8_t smaller_num_bits_in_histogram_bins,
const uint8_t larger_num_bits_in_histogram_bins) {
const bool is_smaller_leaf_valid = (num_data_in_smaller_leaf > min_data_in_leaf_ &&
sum_hessians_in_smaller_leaf > min_sum_hessian_in_leaf_ &&
(max_depth > 0 && smaller_leaf_depth > 0 && smaller_leaf_depth < max_depth));
((max_depth_ > 0 && smaller_leaf_depth > 0 && smaller_leaf_depth <= max_depth_) || (max_depth_ <= 0)));
const bool is_larger_leaf_valid = (num_data_in_larger_leaf > min_data_in_leaf_ &&
sum_hessians_in_larger_leaf > min_sum_hessian_in_leaf_ && larger_leaf_index >= 0 &&
(max_depth > 0 && larger_leaf_depth > 0 && larger_leaf_depth < max_depth));
((max_depth_ > 0 && larger_leaf_depth > 0 && larger_leaf_depth <= max_depth_) || (max_depth_ <= 0)));
if (grad_scale != nullptr && hess_scale != nullptr) {
LaunchFindBestSplitsDiscretizedForLeafKernel(smaller_leaf_splits, larger_leaf_splits,
smaller_leaf_index, larger_leaf_index, is_smaller_leaf_valid, is_larger_leaf_valid,
Expand Down
2 changes: 2 additions & 0 deletions src/treelearner/cuda/cuda_best_split_finder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ class CUDABestSplitFinder {
const data_size_t num_data_in_larger_leaf,
const double sum_hessians_in_smaller_leaf,
const double sum_hessians_in_larger_leaf,
const int smaller_leaf_depth,
const int larger_leaf_depth,
const score_t* grad_scale,
const score_t* hess_scale,
const uint8_t smaller_num_bits_in_histogram_bins,
Expand Down
35 changes: 35 additions & 0 deletions tests/python_package_test/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -913,6 +913,41 @@ def test_feature_names_are_set_correctly_when_no_feature_names_passed_into_Datas
assert ds.construct().feature_name == ["Column_0", "Column_1", "Column_2"]


def test_max_depth_is_enforced(capsys):
params = {
"objective": "binary",
"min_data": 10,
"num_leaves": 15,
"verbose": -1,
"num_threads": 1,
"max_bin": 255,
"gpu_use_dp": True,
"deterministic": True,
"random_state": 2,
}
X, y = make_blobs(n_samples=1_000, n_features=1, centers=2, random_state=2)
model = lgb.LGBMRegressor(**params)
model.fit(X, y)
fitted_max_depth = (
model.booster_.trees_to_dataframe().groupby("tree_index")["node_depth"].max().value_counts().index.max()
)
assert fitted_max_depth == 9, (
"This data generation and model fitting procedure should be deterministic within backends. "
"Both cpu and cuda should result in models with maximal tree depth 9."
)
# set a constraining value of max_depth, i.e. lower than 9
constrained_model = lgb.LGBMRegressor(max_depth=6, **params)
constrained_model.fit(X, y)
assert (
constrained_model.booster_.trees_to_dataframe()
.groupby("tree_index")["node_depth"]
.max()
.value_counts()
.index.max()
<= 7
), "Trained model contains trees deeper than max_depth = 6"


# NOTE: this intentionally contains values where num_leaves <, ==, and > (max_depth^2)
@pytest.mark.parametrize(("max_depth", "num_leaves"), [(-1, 3), (-1, 50), (5, 3), (5, 31), (5, 32), (8, 3), (8, 31)])
def test_max_depth_warning_is_not_raised_if_num_leaves_is_also_provided(capsys, num_leaves, max_depth):
Expand Down