@@ -57,27 +57,33 @@ def emit(self, record: logging.LogRecord):
5757 sys .stderr .write (f"Log handler error: { record .getMessage ()} \n " )
5858 sys .stderr .flush ()
5959
60+ def _get_backup_files (self ):
61+ """Return sorted list of backup files for this rank/process."""
62+ rank_str = str (self .rank_id ) if self .rank_id is not None else "unknown"
63+ file_prefix = f"rank_{ rank_str } _{ self .proc_name } .msg."
64+ backup_files = [
65+ filename
66+ for filename in os .listdir (self .file_path )
67+ if re .match (rf"{ file_prefix } (\d+)" , filename )
68+ ]
69+ backup_files .sort ()
70+ return backup_files
71+
6072 def _log_file_namer (self ):
61- # Use "unknown" for rank_id if it's None
73+ backup_files = self ._get_backup_files ()
74+ if self .fname is None and backup_files :
75+ return backup_files [0 ]
6276 rank_str = str (self .rank_id ) if self .rank_id is not None else "unknown"
63- return f"rank_{ rank_str } _{ self .proc_name } .msg.{ int (time .time ()* 1000 )} "
77+ file_prefix = f"rank_{ rank_str } _{ self .proc_name } .msg."
78+ return f"{ file_prefix } { int (time .time ()* 1000 )} "
6479
6580 def _cleanup_old_backup_files (self ):
66- """Clean up old log files, keeping only the most recent one's."""
67- backup_files = []
68- # Use "unknown" for rank_id if it's None
69- rank_str = str (self .rank_id ) if self .rank_id is not None else "unknown"
70- for filename in os .listdir (self .file_path ):
71- match = re .match (rf"rank_{ rank_str } _{ self .proc_name } .msg\.(\d+)" , filename )
72- if not match :
73- continue
74- backup_files .append (filename )
75- backup_files .sort ()
81+ """Clean up old log files, keeping only the most recent ones."""
82+ backup_files = self ._get_backup_files ()
7683 for old_file in backup_files [: - self .max_backup_files ]:
7784 try :
7885 os .remove (os .path .join (self .file_path , old_file ))
7986 except (OSError , IOError ) as e :
80- # Log the error but don't fail the entire operation
8187 sys .stderr .write (f"Failed to remove backup file { old_file } : { e } \n " )
8288 sys .stderr .flush ()
8389
@@ -156,7 +162,7 @@ def __init__(self, log_message: str):
156162 # Convert asctime to a datetime object, then to a Unix timestamp
157163 dt = datetime .strptime (value , '%Y-%m-%d %H:%M:%S,%f' )
158164 timestamp = int (dt .timestamp ())
159- self .hash_table [key ] = value
165+ self .hash_table [key ] = timestamp
160166 else :
161167 self .hash_table [key ] = value
162168
0 commit comments