Skip to content

Commit 7b7cfdf

Browse files
authored
Improve logging and error handling (#31)
1 parent c67f473 commit 7b7cfdf

File tree

2 files changed

+34
-29
lines changed

2 files changed

+34
-29
lines changed

KubernetesLogFormatter.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,10 @@ def get_kubernetes_namespace():
1515

1616
class KubernetesLogFormatter(logging.Formatter):
1717
def format(self, record):
18-
log_record = {
19-
"timestamp": self.formatTime(record, self.datefmt),
20-
"level": record.levelname,
21-
"message": record.getMessage(),
22-
"pod_name": os.getenv('HOSTNAME', 'unknown-pod'),
23-
"namespace": get_kubernetes_namespace(),
24-
}
25-
return json.dumps(log_record)
18+
timestamp = self.formatTime(record, self.datefmt or "%Y-%m-%d %H:%M:%S")
19+
level = record.levelname
20+
message = record.getMessage()
21+
pod_name = os.getenv('HOSTNAME', 'unknown-pod')
22+
namespace = get_kubernetes_namespace()
23+
24+
return f"[{timestamp}] [{level}] [{namespace}/{pod_name}] {message}"

main.py

Lines changed: 27 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -94,26 +94,29 @@ def poll_github_actions_and_allocate_runners(token, sleep_time=5):
9494
global POLLED_WITHOUT_ALLOCATING
9595

9696
while True:
97-
something_allocated = False
98-
99-
for repo in REPOS_TO_MONITOR:
100-
queued_url = f"{repo['api_base_url']}/actions/runs?status=queued"
101-
data, _ = get_gh_api(queued_url, token)
102-
103-
if data:
104-
new_allocations = allocate_runners_for_jobs(
105-
workflow_data=data,
106-
token=token,
107-
repo_api_base_url=repo['api_base_url'],
108-
repo_url=repo['repo_url'],
109-
repo_name=repo['name']
110-
)
111-
if new_allocations > 0:
112-
something_allocated = True
113-
114-
if not something_allocated and not POLLED_WITHOUT_ALLOCATING:
115-
logger.info("Polling for queued workflows...")
116-
POLLED_WITHOUT_ALLOCATING = True
97+
try:
98+
something_allocated = False
99+
100+
for repo in REPOS_TO_MONITOR:
101+
queued_url = f"{repo['api_base_url']}/actions/runs?status=queued"
102+
data, _ = get_gh_api(queued_url, token)
103+
104+
if data:
105+
new_allocations = allocate_runners_for_jobs(
106+
workflow_data=data,
107+
token=token,
108+
repo_api_base_url=repo['api_base_url'],
109+
repo_url=repo['repo_url'],
110+
repo_name=repo['name']
111+
)
112+
if new_allocations > 0:
113+
something_allocated = True
114+
115+
if not something_allocated and not POLLED_WITHOUT_ALLOCATING:
116+
logger.info("Polling for queued workflows...")
117+
POLLED_WITHOUT_ALLOCATING = True
118+
except Exception as e:
119+
logger.error(f"Exception in poll_github_actions_and_allocate_runners: {e}")
117120

118121
time.sleep(sleep_time)
119122

@@ -384,7 +387,10 @@ def poll_slurm_statuses(sleep_time=5):
384387
Wrapper function to poll check_slurm_status.
385388
"""
386389
while True:
387-
check_slurm_status()
390+
try:
391+
check_slurm_status()
392+
except Exception as e:
393+
logger.error(f"Exception in poll_slurm_statuses: {e}")
388394
time.sleep(sleep_time)
389395

390396
if __name__ == "__main__":

0 commit comments

Comments
 (0)