Skip to content

Commit 99761bf

Browse files
committed
#582: JSONTaskLister: Improve self-documentation and add proper member initialization
1 parent 1c6be24 commit 99761bf

File tree

1 file changed

+59
-15
lines changed

1 file changed

+59
-15
lines changed

src/lbaf/Utils/lbsJSONTaskLister.py

Lines changed: 59 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,8 @@
4141
#@HEADER
4242
#
4343
"""
44-
Utility to generate a yaml containing lists of tasks associated to their respective ranks,
44+
Utility to generate a YAML file containing lists of tasks associated with their respective ranks,
4545
from the last phase and last sub-iteration of input JSON files.
46-
4746
"""
4847

4948
import os
@@ -53,67 +52,112 @@
5352

5453
from lbaf.IO.lbsVTDataReader import LoadReader
5554
from lbaf.Utils.lbsLogging import get_logger, Logger
55+
from typing import Optional
5656

5757
class JSONTaskLister:
58+
"""
59+
A utility class to process JSON files, extract tasks for each rank, and save the results in a YAML file.
60+
"""
61+
62+
def __init__(self, logger: Optional[Logger] = None):
63+
"""
64+
Initializes an instance of the JSONTaskLister class.
65+
66+
Args:
67+
logger (Optional[Logger]): A logger instance for logging messages. If not provided, a default logger is used.
68+
"""
69+
self.__logger = logger if logger is not None else get_logger()
70+
self.__directory = "" # Directory containing the input JSON files
71+
self.__file_stem = "data" # Default file stem for JSON files
72+
self.__file_suffix = "json" # Default file suffix for JSON files
73+
self.__output_file = "tasks.yaml" # Default name of the output YAML file
74+
5875
def __process_files(self):
76+
"""
77+
Processes the JSON files in the specified directory to extract tasks for each rank.
78+
79+
Returns:
80+
dict: A dictionary where keys are ranks and values are lists of tasks.
81+
"""
82+
# Initialize the JSON data reader
5983
reader = LoadReader(
60-
file_prefix = self.__directory + self.__file_stem,
61-
logger = self.__logger,
62-
file_suffix = self.__file_suffix
84+
file_prefix=self.__directory + self.__file_stem,
85+
logger=self.__logger,
86+
file_suffix=self.__file_suffix
6387
)
6488

65-
tasks = {}
66-
n_ranks = reader.n_ranks
89+
tasks = {} # Dictionary to store tasks by rank
90+
n_ranks = reader.n_ranks # Get the total number of ranks
6791

6892
try:
93+
# Iterate over each rank
6994
for rank in range(n_ranks):
70-
_, data = reader._load_vt_file(rank)
71-
phases = data.get("phases", [])
95+
_, data = reader._load_vt_file(rank) # Load JSON data for the current rank
96+
phases = data.get("phases", []) # Extract phases from the data
97+
7298
if not phases:
7399
self.__logger.warning(f"No phases found for rank {rank}")
74100
continue
75101

76-
last_phase = phases[-1]
102+
last_phase = phases[-1] # Get the last phase
77103

104+
# Check if there are load balancing iterations in the last phase
78105
if "lb_iterations" in last_phase:
79106
lb_iterations = last_phase["lb_iterations"]
107+
80108
if lb_iterations:
109+
# Extract tasks from the last load balancing iteration
81110
last_lb_iteration = lb_iterations[-1]
82-
iteration_tasks = [task["entity"].get("seq_id", task["entity"].get("id")) for task in last_lb_iteration.get("tasks", [])]
111+
iteration_tasks = [
112+
task["entity"].get("seq_id", task["entity"].get("id"))
113+
for task in last_lb_iteration.get("tasks", [])
114+
]
83115
tasks[rank] = iteration_tasks
84116
else:
85117
self.__logger.warning(f"No lb_iterations found in the last phase of rank {rank}")
86118
else:
87-
phase_tasks = [task["entity"].get("seq_id", task["entity"].get("id")) for task in last_phase.get("tasks", [])]
119+
# Extract tasks directly from the last phase if no lb_iterations exist
120+
phase_tasks = [
121+
task["entity"].get("seq_id", task["entity"].get("id"))
122+
for task in last_phase.get("tasks", [])
123+
]
88124
tasks[rank] = phase_tasks
125+
89126
except (json.JSONDecodeError, KeyError, ValueError, IndexError) as e:
90127
self.__logger.error(f"Error processing rank {rank}: {e}")
91128
return
92129

93130
return tasks
94131

95132
def run(self):
133+
"""
134+
Main entry point for the JSONTaskLister utility. Parses command-line arguments,
135+
processes JSON files, and writes the extracted tasks to a YAML file.
136+
"""
137+
# Parse command-line arguments
96138
parser = argparse.ArgumentParser(description="Extract tasks from JSON files.")
97139
parser.add_argument("directory", type=str, help="Directory containing JSON files.")
98140
parser.add_argument("--file-stem", type=str, default="data", help="File stem for JSON files (default: 'data').")
99141
parser.add_argument("--file-suffix", type=str, default="json", help="File suffix for JSON files (default: 'json').")
100-
parser.add_argument("--output", type=str, default="tasks.yml", help="Output YAML file (default: 'tasks.yml').")
142+
parser.add_argument("--output", type=str, default="tasks.yaml", help="Output YAML file (default: 'tasks.yml').")
101143

102144
args = parser.parse_args()
103145

146+
# Set instance variables based on parsed arguments
104147
self.__directory = args.directory
105148
self.__file_stem = args.file_stem
106149
self.__file_suffix = args.file_suffix
107150
self.__output_file = args.output
108151

109-
self.__logger = get_logger()
110-
152+
# Validate the directory
111153
if not os.path.isdir(self.__directory):
112154
self.__logger.error(f"Directory not found: {self.__directory}")
113155
return
114156

157+
# Process files and extract tasks
115158
tasks = self.__process_files()
116159

160+
# Write the extracted tasks to the output YAML file
117161
try:
118162
with open(self.__output_file, 'w') as file:
119163
yaml.safe_dump(tasks, file)

0 commit comments

Comments
 (0)