@@ -370,6 +370,8 @@ class RayParams:
370370 Defaults to 0 (no retries). Set to -1 for unlimited retries.
371371 checkpoint_frequency (int): How often to save checkpoints. Defaults
372372 to ``5`` (every 5th iteration).
373+ verbose (bool): Whether to output Ray-specific info messages
374+ during training/prediction.
373375 """
374376 # Actor scheduling
375377 num_actors : int = 0
@@ -386,6 +388,8 @@ class RayParams:
386388 # Distributed callbacks
387389 distributed_callbacks : Optional [List [DistributedCallback ]] = None
388390
391+ verbose : Optional [bool ] = None
392+
389393 def get_tune_resources (self ):
390394 """Return the resources to use for xgboost_ray training with Tune."""
391395 if self .cpus_per_actor <= 0 or self .num_actors <= 0 :
@@ -426,6 +430,9 @@ def _validate_ray_params(ray_params: Union[None, RayParams, dict]) \
426430 warnings .warn (
427431 f"`num_actors` in `ray_params` is smaller than 2 "
428432 f"({ ray_params .num_actors } ). XGBoost will NOT be distributed!" )
433+ if ray_params .verbose is None :
434+ # In Tune sessions, reduce verbosity
435+ ray_params .verbose = not is_session_enabled ()
429436 return ray_params
430437
431438
@@ -930,6 +937,9 @@ def _train(params: Dict,
930937 from xgboost_ray .elastic import _maybe_schedule_new_actors , \
931938 _update_scheduled_actor_states , _get_actor_alive_status
932939
940+ # Do not modify original parameters
941+ params = params .copy ()
942+
933943 # Un-schedule possible scheduled restarts
934944 _training_state .restart_training_at = None
935945
@@ -944,6 +954,13 @@ def _train(params: Dict,
944954 params ["nthread" ] = cpus_per_actor
945955 params ["n_jobs" ] = cpus_per_actor
946956
957+ if ray_params .verbose :
958+ maybe_log = logger .info
959+ params .setdefault ("verbosity" , 1 )
960+ else :
961+ maybe_log = logger .debug
962+ params .setdefault ("verbosity" , 0 )
963+
947964 # This is a callback that handles actor failures.
948965 # We identify the rank of the failed actor, add this to a set of
949966 # failed actors (which we might want to restart later), and set its
@@ -979,9 +996,10 @@ def handle_actor_failure(actor_id):
979996 newly_created += 1
980997
981998 alive_actors = sum (1 for a in _training_state .actors if a is not None )
982- logger .info (f"[RayXGBoost] Created { newly_created } new actors "
983- f"({ alive_actors } total actors). Waiting until actors "
984- f"are ready for training." )
999+
1000+ maybe_log (f"[RayXGBoost] Created { newly_created } new actors "
1001+ f"({ alive_actors } total actors). Waiting until actors "
1002+ f"are ready for training." )
9851003
9861004 # For distributed datasets (e.g. Modin), this will initialize
9871005 # (and fix) the assignment of data shards to actor ranks
@@ -1024,7 +1042,7 @@ def handle_actor_failure(actor_id):
10241042 _get_actor_alive_status (_training_state .actors , handle_actor_failure )
10251043 raise RayActorError from exc
10261044
1027- logger . info ("[RayXGBoost] Starting XGBoost training." )
1045+ maybe_log ("[RayXGBoost] Starting XGBoost training." )
10281046
10291047 # Start Rabit tracker for gradient sharing
10301048 rabit_process , env = _start_rabit_tracker (alive_actors )
@@ -1515,10 +1533,15 @@ def _wrapped(*args, **kwargs):
15151533 train_additional_results ["training_time_s" ] = total_training_time
15161534 train_additional_results ["total_time_s" ] = total_time
15171535
1518- logger .info ("[RayXGBoost] Finished XGBoost training on training data "
1519- "with total N={total_n:,} in {total_time_s:.2f} seconds "
1520- "({training_time_s:.2f} pure XGBoost training time)." .format (
1521- ** train_additional_results ))
1536+ if ray_params .verbose :
1537+ maybe_log = logger .info
1538+ else :
1539+ maybe_log = logger .debug
1540+
1541+ maybe_log ("[RayXGBoost] Finished XGBoost training on training data "
1542+ "with total N={total_n:,} in {total_time_s:.2f} seconds "
1543+ "({training_time_s:.2f} pure XGBoost training time)." .format (
1544+ ** train_additional_results ))
15221545
15231546 _shutdown (
15241547 actors = actors ,
@@ -1540,6 +1563,11 @@ def _predict(model: xgb.Booster, data: RayDMatrix, ray_params: RayParams,
15401563 ** kwargs ):
15411564 _assert_ray_support ()
15421565
1566+ if ray_params .verbose :
1567+ maybe_log = logger .info
1568+ else :
1569+ maybe_log = logger .debug
1570+
15431571 if not ray .is_initialized ():
15441572 ray .init ()
15451573
@@ -1555,7 +1583,7 @@ def _predict(model: xgb.Booster, data: RayDMatrix, ray_params: RayParams,
15551583 distributed_callbacks = ray_params .distributed_callbacks )
15561584 for i in range (ray_params .num_actors )
15571585 ]
1558- logger . info (f"[RayXGBoost] Created { len (actors )} remote actors." )
1586+ maybe_log (f"[RayXGBoost] Created { len (actors )} remote actors." )
15591587
15601588 # Split data across workers
15611589 wait_load = []
@@ -1572,7 +1600,7 @@ def _predict(model: xgb.Booster, data: RayDMatrix, ray_params: RayParams,
15721600 # Put model into object store
15731601 model_ref = ray .put (model )
15741602
1575- logger . info ("[RayXGBoost] Starting XGBoost prediction." )
1603+ maybe_log ("[RayXGBoost] Starting XGBoost prediction." )
15761604
15771605 # Train
15781606 fut = [actor .predict .remote (model_ref , data , ** kwargs ) for actor in actors ]
0 commit comments