Skip to content
This repository was archived by the owner on Jan 12, 2026. It is now read-only.

Commit d5bca66

Browse files
authored
Don't autodetect resources when using with Tune (#60)
* test * no autodetect for tune * updates
1 parent 93ff047 commit d5bca66

File tree

1 file changed

+17
-9
lines changed

1 file changed

+17
-9
lines changed

xgboost_ray/main.py

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,9 @@ class RayParams:
278278

279279
def get_tune_resources(self):
280280
"""Return the resources to use for xgboost_ray training with Tune."""
281+
if self.cpus_per_actor <= 0 or self.num_actors <= 0:
282+
raise ValueError("num_actors and cpus_per_actor both must be "
283+
"greater than 0.")
281284
return _get_tune_resources(
282285
num_actors=self.num_actors,
283286
cpus_per_actor=self.cpus_per_actor,
@@ -1036,22 +1039,27 @@ def _wrapped(*args, **kwargs):
10361039
"`dtrain = RayDMatrix(data=data, label=label)`.".format(
10371040
type(dtrain)))
10381041

1039-
cpus_per_actor, gpus_per_actor = _autodetect_resources(
1040-
ray_params=ray_params,
1041-
use_tree_method="tree_method" in params
1042-
and params["tree_method"].startswith("gpu"))
1043-
1044-
if gpus_per_actor == 0 and cpus_per_actor == 0:
1045-
raise ValueError("cpus_per_actor and gpus_per_actor both cannot be "
1046-
"0. Are you sure your cluster has CPUs available?")
1047-
10481042
added_tune_callback = _try_add_tune_callback(kwargs)
10491043
# Tune currently does not support elastic training.
10501044
if added_tune_callback and ray_params.elastic_training:
10511045
raise ValueError("Elastic Training cannot be used with Ray Tune. "
10521046
"Please disable elastic_training in RayParams in "
10531047
"order to use xgboost_ray with Tune.")
10541048

1049+
if added_tune_callback:
1050+
# Don't autodetect resources when used with Tune.
1051+
cpus_per_actor = ray_params.cpus_per_actor
1052+
gpus_per_actor = max(0, ray_params.gpus_per_actor)
1053+
else:
1054+
cpus_per_actor, gpus_per_actor = _autodetect_resources(
1055+
ray_params=ray_params,
1056+
use_tree_method="tree_method" in params
1057+
and params["tree_method"].startswith("gpu"))
1058+
1059+
if gpus_per_actor == 0 and cpus_per_actor == 0:
1060+
raise ValueError("cpus_per_actor and gpus_per_actor both cannot be "
1061+
"0. Are you sure your cluster has CPUs available?")
1062+
10551063
if ray_params.elastic_training and ray_params.max_failed_actors == 0:
10561064
raise ValueError(
10571065
"Elastic training enabled but the maximum number of failed "

0 commit comments

Comments
 (0)