@@ -481,14 +481,22 @@ def collect(
481481 # Episode Done
482482 # ==============================================================
483483 if episode_timestep .done :
484- self ._logger .info (f'======== Env { env_id } episode finished! ========' )
485484 self ._total_episode_count += 1
486485 # Logging
487486 info_log = {
488487 'reward' : episode_timestep .info ['eval_episode_return' ],
489488 'time' : self ._env_info [env_id ]['time' ],
490489 'step' : self ._env_info [env_id ]['step' ],
491490 'llm_prior_entropy' : sum (llm_prior_entropy [env_id ])/ len (llm_prior_entropy [env_id ])}
491+
492+ # Structured episode completion log
493+ self ._logger .info (
494+ f"[Episode Complete] Env={ env_id } | "
495+ f"Reward={ info_log ['reward' ]:.2f} | "
496+ f"Steps={ info_log ['step' ]} | "
497+ f"Time={ info_log ['time' ]:.2f} s | "
498+ f"LLM_Entropy={ info_log ['llm_prior_entropy' ]:.3f} "
499+ )
492500 if not collect_with_pure_policy :
493501 info_log ['visit_entropy' ] = (
494502 visit_entropies_lst [env_id ] / eps_steps_lst [env_id ]
@@ -540,8 +548,7 @@ def collect(
540548 # ==================================================================
541549 if len (self .game_segment_pool ) >= self ._default_num_segments :
542550 self ._logger .info (
543- f'✓ Collected { len (self .game_segment_pool )} segments '
544- f'(target: { self ._default_num_segments } )'
551+ f"[Collection Complete] Segments={ len (self .game_segment_pool )} /{ self ._default_num_segments } "
545552 )
546553
547554 # Format return data
@@ -565,13 +572,17 @@ def collect(
565572 collected_duration = sum ([d ['time' ] for d in self ._episode_info ])
566573
567574 if self ._world_size > 1 :
568- # Before allreduce
569- self . _logger . info ( f"Rank { self . _rank } before allreduce: collected_step= { collected_step } , collected_episode= { collected_episode } " )
575+ # Aggregate data across ranks
576+ local_step , local_episode = collected_step , collected_episode
570577 collected_step = allreduce_data (collected_step , 'sum' )
571578 collected_episode = allreduce_data (collected_episode , 'sum' )
572579 collected_duration = allreduce_data (collected_duration , 'sum' )
573- # After allreduce
574- self ._logger .info (f"Rank { self ._rank } after allreduce: collected_step={ collected_step } , collected_episode={ collected_episode } " )
580+
581+ self ._logger .info (
582+ f"[Rank { self ._rank } Aggregation] "
583+ f"Local: steps={ local_step } , episodes={ local_episode } | "
584+ f"Global: steps={ collected_step } , episodes={ collected_episode } "
585+ )
575586
576587
577588 self ._total_envstep_count += collected_step
@@ -625,9 +636,26 @@ def _output_log(self, train_iter: int) -> None:
625636 info ['completed_value_mean' ] = np .mean (completed_value )
626637
627638 self ._episode_info .clear ()
628-
629- # Log to console
630- self ._logger .info ("Collector Training Summary:\n {}" .format ('\n ' .join ([f' { k } : { v } ' for k , v in info .items ()])))
639+
640+ # Structured summary log
641+ self ._logger .info (
642+ f"\n { '=' * 80 } \n "
643+ f"[Collector Summary] Train Iter: { train_iter } \n "
644+ f"{ '-' * 80 } \n "
645+ f"Episodes: { info ['episode_count' ]} (Total: { info ['total_episode_count' ]} )\n "
646+ f"Steps: { info ['envstep_count' ]} (Total: { info ['total_envstep_count' ]} )\n "
647+ f"Avg Steps/Ep: { info ['avg_envstep_per_episode' ]:.1f} \n "
648+ f"Throughput: { info ['avg_envstep_per_sec' ]:.2f} steps/s, { info ['avg_episode_per_sec' ]:.3f} eps/s\n "
649+ f"Duration: { info ['collect_time' ]:.2f} s (Total: { info ['total_duration' ]:.2f} s)\n "
650+ f"{ '-' * 80 } \n "
651+ f"Reward: mean={ info ['reward_mean' ]:.2f} , std={ info ['reward_std' ]:.2f} , "
652+ f"min={ info ['reward_min' ]:.2f} , max={ info ['reward_max' ]:.2f} \n "
653+ f"LLM Entropy: mean={ info ['llm_prior_entropy_mean' ]:.3f} , "
654+ f"min={ info ['llm_prior_entropy_min' ]:.3f} , max={ info ['llm_prior_entropy_max' ]:.3f} \n "
655+ + (f"Visit Entropy: { info .get ('visit_entropy_mean' , 0 ):.3f} \n " if not self .collect_with_pure_policy else "" )
656+ + (f"Completed Val: { info .get ('completed_value_mean' , 0 ):.3f} \n " if self .policy_config .gumbel_algo else "" )
657+ + f"{ '=' * 80 } "
658+ )
631659
632660 # Log to TensorBoard and WandB
633661 for k , v in info .items ():
0 commit comments