Skip to content

Commit 2f7df7b

Browse files
committed
fix: correct wrong stats in JobQueue logs
1 parent 1f4d6a0 commit 2f7df7b

1 file changed

Lines changed: 12 additions & 8 deletions

File tree

src/mmore/job_queue.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -72,22 +72,22 @@ def __init__(
7272
devices: Optional[list[str]] = None,
7373
):
7474
self.devices = devices or _detect_devices()
75-
n_workers = len(self.devices) * jobs_per_gpu
76-
self.max_queue_size = max_queue_size or n_workers * 10
75+
self.n_workers = len(self.devices) * jobs_per_gpu
76+
self.max_queue_size = max_queue_size or self.n_workers * 10
7777

7878
self._device_pool: queue.Queue[str] = queue.Queue()
7979
for _ in range(jobs_per_gpu):
8080
for device in self.devices:
8181
self._device_pool.put(device)
8282

83-
self._executor = ThreadPoolExecutor(max_workers=n_workers)
83+
self._executor = ThreadPoolExecutor(max_workers=self.n_workers)
8484
self._jobs: dict[str, Job] = {}
8585
self._reserved: set[str] = set()
8686
self._lock = threading.Lock()
8787

8888
logger.info(
8989
"[JobQueue] ready: %d worker(s) on %s, max_queue=%d",
90-
n_workers,
90+
self.n_workers,
9191
self.devices,
9292
self.max_queue_size,
9393
)
@@ -111,12 +111,10 @@ def submit(
111111
self._jobs[job_id] = Job(id=job_id, file_id=file_id, filename=filename)
112112

113113
logger.info(
114-
"[JobQueue] job %s queued (file_id=%s, filename=%s), gpus free: %d/%d",
114+
"[JobQueue] job %s queued (file_id=%s, filename=%s)",
115115
job_id,
116116
file_id,
117117
filename,
118-
self._device_pool.qsize(),
119-
len(self.devices),
120118
)
121119
self._executor.submit(self._run, job_id, work_fn)
122120
return job_id
@@ -133,7 +131,13 @@ def _run(self, job_id: str, work_fn: Callable[[str], dict]) -> None:
133131
job.device = device
134132
job.status = JobStatus.PROCESSING
135133
job.started_at = time.time()
136-
logger.info("[JobQueue] job %s processing (gpu=%s)", job_id, device)
134+
logger.info(
135+
"[JobQueue] job %s processing (gpu=%s), free slots: %d/%d",
136+
job_id,
137+
device,
138+
self._device_pool.qsize(),
139+
self.n_workers,
140+
)
137141

138142
result = None
139143
error = None

0 commit comments

Comments
 (0)